diff --git a/docs/api/api_docs/classes/BitWidthConfig.html b/docs/api/api_docs/classes/BitWidthConfig.html index 34ff1f8af..83473d0f6 100644 --- a/docs/api/api_docs/classes/BitWidthConfig.html +++ b/docs/api/api_docs/classes/BitWidthConfig.html @@ -7,7 +7,7 @@ BitWidthConfig — MCT Documentation: ver 2.6.0 - + diff --git a/docs/api/api_docs/classes/DataGenerationConfig.html b/docs/api/api_docs/classes/DataGenerationConfig.html index 5390f34b1..764903e1b 100644 --- a/docs/api/api_docs/classes/DataGenerationConfig.html +++ b/docs/api/api_docs/classes/DataGenerationConfig.html @@ -7,7 +7,7 @@ Data Generation Configuration — MCT Documentation: ver 2.6.0 - + diff --git a/docs/api/api_docs/classes/DefaultDict.html b/docs/api/api_docs/classes/DefaultDict.html index 4626bc63b..65224dba7 100644 --- a/docs/api/api_docs/classes/DefaultDict.html +++ b/docs/api/api_docs/classes/DefaultDict.html @@ -7,7 +7,7 @@ DefaultDict Class — MCT Documentation: ver 2.6.0 - + @@ -60,16 +60,16 @@

Navigation

Get the value of the inner dictionary by the given key, If key is not in dictionary, it uses the default_factory to return a default value.

-
Return type:
-

Any

-
-
Parameters:
-

key – Key to use in inner dictionary.

+
Parameters:
+

key – Key to use in inner dictionary.

-
Returns:
-

Value of the inner dictionary by the given key, or a default value if not exist. +

Returns:
+

Value of the inner dictionary by the given key, or a default value if not exist. If default_factory was not passed at initialization, it returns None.

+
Return type:
+

Any

+
diff --git a/docs/api/api_docs/classes/FrameworkInfo.html b/docs/api/api_docs/classes/FrameworkInfo.html index a14f731d1..713055db3 100644 --- a/docs/api/api_docs/classes/FrameworkInfo.html +++ b/docs/api/api_docs/classes/FrameworkInfo.html @@ -7,7 +7,7 @@ FrameworkInfo Class — MCT Documentation: ver 2.6.0 - + @@ -66,7 +66,7 @@

Navigation

Examples

When quantizing a Keras model, if we want to quantize the kernels of Conv2D layers only, we can set, and we know it’s kernel out/in channel indices are (3, 2) respectivly:

-
>>> import tensorflow as tf
+
>>> import tensorflow as tf
 >>> kernel_ops = [tf.keras.layers.Conv2D]
 >>> kernel_channels_mapping = DefaultDict({tf.keras.layers.Conv2D: (3,2)})
 
diff --git a/docs/api/api_docs/classes/GradientPTQConfig.html b/docs/api/api_docs/classes/GradientPTQConfig.html index f8c3485cc..c31461a72 100644 --- a/docs/api/api_docs/classes/GradientPTQConfig.html +++ b/docs/api/api_docs/classes/GradientPTQConfig.html @@ -7,7 +7,7 @@ GradientPTQConfig Class — MCT Documentation: ver 2.6.0 - + diff --git a/docs/api/api_docs/classes/MixedPrecisionQuantizationConfig.html b/docs/api/api_docs/classes/MixedPrecisionQuantizationConfig.html index 8c2dfca9d..7ddeea6c4 100644 --- a/docs/api/api_docs/classes/MixedPrecisionQuantizationConfig.html +++ b/docs/api/api_docs/classes/MixedPrecisionQuantizationConfig.html @@ -7,7 +7,7 @@ MixedPrecisionQuantizationConfig — MCT Documentation: ver 2.6.0 - + diff --git a/docs/api/api_docs/classes/PruningConfig.html b/docs/api/api_docs/classes/PruningConfig.html index 1abe2e370..aeb06f672 100644 --- a/docs/api/api_docs/classes/PruningConfig.html +++ b/docs/api/api_docs/classes/PruningConfig.html @@ -7,7 +7,7 @@ Pruning Configuration — MCT Documentation: ver 2.6.0 - + diff --git a/docs/api/api_docs/classes/PruningInfo.html b/docs/api/api_docs/classes/PruningInfo.html index b66597303..962091a6d 100644 --- a/docs/api/api_docs/classes/PruningInfo.html +++ b/docs/api/api_docs/classes/PruningInfo.html @@ -7,7 +7,7 @@ Pruning Information — MCT Documentation: ver 2.6.0 - + @@ -65,6 +65,9 @@

Navigation

Return type:

Dict[BaseNode, np.ndarray]

+
Return type:
+

Dict[BaseNode, ndarray]

+
@@ -79,6 +82,9 @@

Navigation

Return type:

Dict[BaseNode, np.ndarray]

+
Return type:
+

Dict[BaseNode, ndarray]

+
diff --git a/docs/api/api_docs/classes/QuantizationConfig.html b/docs/api/api_docs/classes/QuantizationConfig.html index dfc3ab3a5..4eab2f6ad 100644 --- a/docs/api/api_docs/classes/QuantizationConfig.html +++ b/docs/api/api_docs/classes/QuantizationConfig.html @@ -7,7 +7,7 @@ QuantizationConfig — MCT Documentation: ver 2.6.0 - + @@ -50,7 +50,7 @@

Navigation

activations using thresholds, with weight threshold selection based on MSE and activation threshold selection using NOCLIPPING (min/max), while enabling relu_bound_to_power_of_2 and weights_bias_correction, you can instantiate a quantization configuration like this:

-
>>> import model_compression_toolkit as mct
+
>>> import model_compression_toolkit as mct
 >>> qc = mct.core.QuantizationConfig(activation_error_method=mct.core.QuantizationErrorMethod.NOCLIPPING, weights_error_method=mct.core.QuantizationErrorMethod.MSE, relu_bound_to_power_of_2=True, weights_bias_correction=True)
 
diff --git a/docs/api/api_docs/classes/QuantizationErrorMethod.html b/docs/api/api_docs/classes/QuantizationErrorMethod.html index f6685e620..a3d3b092a 100644 --- a/docs/api/api_docs/classes/QuantizationErrorMethod.html +++ b/docs/api/api_docs/classes/QuantizationErrorMethod.html @@ -7,7 +7,7 @@ QuantizationErrorMethod — MCT Documentation: ver 2.6.0 - + diff --git a/docs/api/api_docs/classes/ResourceUtilization.html b/docs/api/api_docs/classes/ResourceUtilization.html index 9e4ea601c..9c0fe05c3 100644 --- a/docs/api/api_docs/classes/ResourceUtilization.html +++ b/docs/api/api_docs/classes/ResourceUtilization.html @@ -7,7 +7,7 @@ ResourceUtilization — MCT Documentation: ver 2.6.0 - + diff --git a/docs/api/api_docs/classes/Wrapper.html b/docs/api/api_docs/classes/Wrapper.html index 36a729fc8..198ef4b24 100644 --- a/docs/api/api_docs/classes/Wrapper.html +++ b/docs/api/api_docs/classes/Wrapper.html @@ -7,7 +7,7 @@ wrapper — MCT Documentation: ver 2.6.0 - + @@ -57,11 +57,8 @@

Navigation

quantize_and_export(float_model, representative_dataset, framework='pytorch', method='PTQ', use_mixed_precision=False, param_items=None)

Main function to perform model quantization and export.

-
Return type:
-

Tuple[bool, Any]

-
-
Parameters:
-
    +
    Parameters:
    +
    • float_model – The float model to be quantized.

    • representative_dataset (Callable, np.array, tf.Tensor) – Representative dataset for calibration.

    • framework (str) – ‘tensorflow’ or ‘pytorch’. @@ -74,13 +71,13 @@

      Navigation

      [[key,value],…]. Default: None

    -
    Returns:
    -

    tuple (quantization success flag, quantized model)

    +
    Returns:
    +

    tuple (quantization success flag, quantized model)

Examples

Import MCT

-
>>> import model_compression_toolkit as mct
+
>>> import model_compression_toolkit as mct
 

Prepare the float model and dataset

@@ -345,6 +342,11 @@

Navigation

+
+
Return type:
+

Tuple[bool, Any]

+
+
diff --git a/docs/api/api_docs/classes/XQuantConfig.html b/docs/api/api_docs/classes/XQuantConfig.html index 68b667bf7..19fc4fbef 100644 --- a/docs/api/api_docs/classes/XQuantConfig.html +++ b/docs/api/api_docs/classes/XQuantConfig.html @@ -7,7 +7,7 @@ XQuant Configuration — MCT Documentation: ver 2.6.0 - + diff --git a/docs/api/api_docs/index.html b/docs/api/api_docs/index.html index 7165a2c56..5c7693b1c 100644 --- a/docs/api/api_docs/index.html +++ b/docs/api/api_docs/index.html @@ -7,7 +7,7 @@ API Docs — MCT Documentation: ver 2.6.0 - + @@ -45,7 +45,7 @@

Navigation

API Docs

Init module for MCT API.

-
import model_compression_toolkit as mct
+
import model_compression_toolkit as mct
 
diff --git a/docs/api/api_docs/methods/get_keras_data_generation_config.html b/docs/api/api_docs/methods/get_keras_data_generation_config.html index 394e33802..88918209e 100644 --- a/docs/api/api_docs/methods/get_keras_data_generation_config.html +++ b/docs/api/api_docs/methods/get_keras_data_generation_config.html @@ -7,7 +7,7 @@ Get DataGenerationConfig for Keras Models — MCT Documentation: ver 2.6.0 - + @@ -45,11 +45,8 @@

Navigation

model_compression_toolkit.data_generation.get_keras_data_generation_config(n_iter=DEFAULT_N_ITER, optimizer=Adam, data_gen_batch_size=DEFAULT_DATA_GEN_BS, initial_lr=DEFAULT_KERAS_INITIAL_LR, output_loss_multiplier=DEFAULT_KERAS_OUTPUT_LOSS_MULTIPLIER, scheduler_type=SchedulerType.REDUCE_ON_PLATEAU, bn_alignment_loss_type=BatchNormAlignemntLossType.L2_SQUARE, output_loss_type=OutputLossType.REGULARIZED_MIN_MAX_DIFF, data_init_type=DataInitType.Gaussian, layer_weighting_type=BNLayerWeightingType.AVERAGE, image_granularity=ImageGranularity.BatchWise, image_pipeline_type=ImagePipelineType.SMOOTHING_AND_AUGMENTATION, image_normalization_type=ImageNormalizationType.KERAS_APPLICATIONS, extra_pixels=DEFAULT_KERAS_EXTRA_PIXELS, bn_layer_types=[BatchNormalization], image_clipping=False)

Function to create a DataGenerationConfig object with the specified configuration parameters.

-
Return type:
-

DataGenerationConfig

-
-
Parameters:
-
    +
    Parameters:
    +
    • n_iter (int) – Number of iterations for the data generation process.

    • optimizer (Optimizer) – The optimizer to use for the data generation process.

    • data_gen_batch_size (int) – Batch size for data generation.

    • @@ -68,11 +65,14 @@

      Navigation

    • image_clipping (bool) – Whether to clip images during optimization.

    -
    Returns:
    -

    Data generation configuration object.

    +
    Returns:
    +

    Data generation configuration object.

    +
    +
    Return type:
    +

    DataGenerationConfig

    Return type:
    -

    DataGenerationConfig

    +

    DataGenerationConfig

diff --git a/docs/api/api_docs/methods/get_keras_gptq_config.html b/docs/api/api_docs/methods/get_keras_gptq_config.html index bde134fb8..36099273c 100644 --- a/docs/api/api_docs/methods/get_keras_gptq_config.html +++ b/docs/api/api_docs/methods/get_keras_gptq_config.html @@ -7,7 +7,7 @@ Get GradientPTQConfig for Keras Models — MCT Documentation: ver 2.6.0 - + @@ -45,11 +45,8 @@

Navigation

model_compression_toolkit.gptq.get_keras_gptq_config(n_epochs, optimizer=None, optimizer_rest=None, loss=None, log_function=None, use_hessian_based_weights=True, regularization_factor=None, hessian_batch_size=ACT_HESSIAN_DEFAULT_BATCH_SIZE, use_hessian_sample_attention=True, gradual_activation_quantization=True)

Create a GradientPTQConfig instance for Keras models.

-
Return type:
-

GradientPTQConfig

-
-
Parameters:
-
    +
    Parameters:
    +
    • n_epochs (int) – Number of epochs for running the representative dataset for fine-tuning.

    • optimizer (OptimizerV2) – Keras optimizer to use for fine-tuning for auxiliary variable. Default: Adam(learning rate set to 3e-2).

    • optimizer_rest (OptimizerV2) – Keras optimizer to use for fine-tuning of the bias variable. Default: Adam(learning rate set to 1e-4).

    • @@ -62,14 +59,14 @@

      Navigation

    • gradual_activation_quantization (bool, GradualActivationQuantizationConfig) – If False, GradualActivationQuantization is disabled. If True, GradualActivationQuantization is enabled with the default settings. GradualActivationQuantizationConfig object can be passed to use non-default settings.

    -
    Returns:
    -

    a GradientPTQConfig object to use when fine-tuning the quantized model using gptq.

    +
    Returns:
    +

    a GradientPTQConfig object to use when fine-tuning the quantized model using gptq.

Examples

Import MCT and TensorFlow:

-
>>> import model_compression_toolkit as mct
->>> import tensorflow as tf
+
>>> import model_compression_toolkit as mct
+>>> import tensorflow as tf
 

Create a GradientPTQConfig to run for 5 epochs:

@@ -81,6 +78,11 @@

Navigation

The configuration can be passed to keras_gradient_post_training_quantization() in order to quantize a keras model using gptq.

+
+
Return type:
+

GradientPTQConfig

+
+
diff --git a/docs/api/api_docs/methods/get_pytorch_data_generation_config.html b/docs/api/api_docs/methods/get_pytorch_data_generation_config.html index 9bd99a6b2..dae05e83c 100644 --- a/docs/api/api_docs/methods/get_pytorch_data_generation_config.html +++ b/docs/api/api_docs/methods/get_pytorch_data_generation_config.html @@ -7,7 +7,7 @@ Get DataGenerationConfig for Pytorch Models — MCT Documentation: ver 2.6.0 - + @@ -45,11 +45,8 @@

Navigation

model_compression_toolkit.data_generation.get_pytorch_data_generation_config(n_iter=DEFAULT_N_ITER, optimizer=RAdam, data_gen_batch_size=DEFAULT_DATA_GEN_BS, initial_lr=DEFAULT_PYTORCH_INITIAL_LR, output_loss_multiplier=DEFAULT_PYTORCH_OUTPUT_LOSS_MULTIPLIER, scheduler_type=SchedulerType.REDUCE_ON_PLATEAU_WITH_RESET, bn_alignment_loss_type=BatchNormAlignemntLossType.L2_SQUARE, output_loss_type=OutputLossType.NEGATIVE_MIN_MAX_DIFF, data_init_type=DataInitType.Gaussian, layer_weighting_type=BNLayerWeightingType.AVERAGE, image_granularity=ImageGranularity.AllImages, image_pipeline_type=ImagePipelineType.SMOOTHING_AND_AUGMENTATION, image_normalization_type=ImageNormalizationType.TORCHVISION, extra_pixels=DEFAULT_PYTORCH_EXTRA_PIXELS, bn_layer_types=DEFAULT_PYTORCH_BN_LAYER_TYPES, last_layer_types=DEFAULT_PYTORCH_LAST_LAYER_TYPES, image_clipping=True)

Function to create a DataGenerationConfig object with the specified configuration parameters.

-
Return type:
-

DataGenerationConfig

-
-
Parameters:
-
    +
    Parameters:
    +
    • n_iter (int) – Number of iterations for the data generation process.

    • optimizer (Optimizer) – The optimizer to use for the data generation process.

    • data_gen_batch_size (int) – Batch size for data generation.

    • @@ -69,11 +66,14 @@

      Navigation

    • image_clipping (bool) – Whether to clip images during optimization.

    -
    Returns:
    -

    Data generation configuration object.

    +
    Returns:
    +

    Data generation configuration object.

    +
    +
    Return type:
    +

    DataGenerationConfig

    Return type:
    -

    DataGenerationConfig

    +

    DataGenerationConfig

diff --git a/docs/api/api_docs/methods/get_pytroch_gptq_config.html b/docs/api/api_docs/methods/get_pytroch_gptq_config.html index 4b6893053..a27547800 100644 --- a/docs/api/api_docs/methods/get_pytroch_gptq_config.html +++ b/docs/api/api_docs/methods/get_pytroch_gptq_config.html @@ -7,7 +7,7 @@ Get GradientPTQConfig for Pytorch Models — MCT Documentation: ver 2.6.0 - + @@ -45,11 +45,8 @@

Navigation

model_compression_toolkit.gptq.get_pytorch_gptq_config(n_epochs, optimizer=None, optimizer_rest=None, loss=None, log_function=None, use_hessian_based_weights=True, regularization_factor=None, hessian_batch_size=ACT_HESSIAN_DEFAULT_BATCH_SIZE, use_hessian_sample_attention=True, gradual_activation_quantization=True)

Create a GradientPTQConfig instance for Pytorch models.

-
Return type:
-

GradientPTQConfig

-
-
Parameters:
-
    +
    Parameters:
    +
    • n_epochs (int) – Number of epochs for running the representative dataset for fine-tuning.

    • optimizer (Optimizer) – Pytorch optimizer to use for fine-tuning for auxiliary variable. Default: Adam(learning rate set to 3e-2).

    • optimizer_rest (Optimizer) – Pytorch optimizer to use for fine-tuning of the bias variable. Default: Adam(learning rate set to 1e-4).

    • @@ -62,22 +59,27 @@

      Navigation

    • gradual_activation_quantization (bool, GradualActivationQuantizationConfig) – If False, GradualActivationQuantization is disabled. If True, GradualActivationQuantization is enabled with the default settings. GradualActivationQuantizationConfig object can be passed to use non-default settings.

    -
    Returns:
    -

    a GradientPTQConfig object to use when fine-tuning the quantized model using gptq.

    +
    Returns:
    +

    a GradientPTQConfig object to use when fine-tuning the quantized model using gptq.

Examples

Import MCT and Create a GradientPTQConfig to run for 5 epochs:

-
>>> import model_compression_toolkit as mct
+
>>> import model_compression_toolkit as mct
 >>> gptq_conf = mct.gptq.get_pytorch_gptq_config(n_epochs=5)
 

Other PyTorch optimizers can be passed with dummy params:

-
>>> import torch
+
>>> import torch
 >>> gptq_conf = mct.gptq.get_pytorch_gptq_config(n_epochs=3, optimizer=torch.optim.Adam([torch.Tensor(1)]))
 

The configuration can be passed to pytorch_gradient_post_training_quantization() in order to quantize a pytorch model using gptq.

+
+
Return type:
+

GradientPTQConfig

+
+
diff --git a/docs/api/api_docs/methods/get_target_platform_capabilities.html b/docs/api/api_docs/methods/get_target_platform_capabilities.html index d981a4e14..e846a07c3 100644 --- a/docs/api/api_docs/methods/get_target_platform_capabilities.html +++ b/docs/api/api_docs/methods/get_target_platform_capabilities.html @@ -7,7 +7,7 @@ Get TargetPlatformCapabilities for tpc version — MCT Documentation: ver 2.6.0 - + @@ -45,17 +45,17 @@

Navigation

model_compression_toolkit.get_target_platform_capabilities(tpc_version=TPC_V1_0, device_type=IMX500_TP_MODEL)

Retrieves target platform capabilities model based on tpc version and the specified device type.

-
Return type:
-

TargetPlatformCapabilities

-
-
Parameters:
-
    +
    Parameters:
    +
    • tpc_version (str) – Target platform capabilities version.

    • device_type (str) – The type of device for the target platform.

    -
    Returns:
    -

    The TargetPlatformCapabilities object matching the tpc version.

    +
    Returns:
    +

    The TargetPlatformCapabilities object matching the tpc version.

    +
    +
    Return type:
    +

    TargetPlatformCapabilities

diff --git a/docs/api/api_docs/methods/get_target_platform_capabilities_sdsp.html b/docs/api/api_docs/methods/get_target_platform_capabilities_sdsp.html index 8ab54b9ae..55a38f3f7 100644 --- a/docs/api/api_docs/methods/get_target_platform_capabilities_sdsp.html +++ b/docs/api/api_docs/methods/get_target_platform_capabilities_sdsp.html @@ -7,7 +7,7 @@ Get TargetPlatformCapabilities for sdsp converter version — MCT Documentation: ver 2.6.0 - + @@ -45,14 +45,14 @@

Navigation

model_compression_toolkit.get_target_platform_capabilities_sdsp(sdsp_version=SDSP_V3_14)

Retrieves target platform capabilities model based on sdsp converter version.

-
Return type:
-

TargetPlatformCapabilities

+
Parameters:
+

sdsp_version (str) – Sdsp converter version.

-
Parameters:
-

sdsp_version (str) – Sdsp converter version.

+
Returns:
+

The TargetPlatformCapabilities object matching the sdsp converter version.

-
Returns:
-

The TargetPlatformCapabilities object matching the sdsp converter version.

+
Return type:
+

TargetPlatformCapabilities

diff --git a/docs/api/api_docs/methods/keras_data_generation_experimental.html b/docs/api/api_docs/methods/keras_data_generation_experimental.html index 3ecd40705..8a77338b8 100644 --- a/docs/api/api_docs/methods/keras_data_generation_experimental.html +++ b/docs/api/api_docs/methods/keras_data_generation_experimental.html @@ -7,7 +7,7 @@ Keras Data Generation — MCT Documentation: ver 2.6.0 - + @@ -45,30 +45,27 @@

Navigation

model_compression_toolkit.data_generation.keras_data_generation_experimental(model, n_images, output_image_size, data_generation_config)

Function to perform data generation using the provided Keras model and data generation configuration.

-
Return type:
-

Tensor

-
-
Parameters:
-
    +
    Parameters:
    +
    • model (Model) – Keras model to generate data for.

    • n_images (int) – Number of images to generate.

    • output_image_size (Union[int, Tuple[int, int]]) – Size of the output images.

    • data_generation_config (DataGenerationConfig) – Configuration for data generation.

    -
    Returns:
    -

    Finalized list containing generated images.

    +
    Returns:
    +

    Finalized list containing generated images.

    -
    Return type:
    -

    List[tf.Tensor]

    +
    Return type:
    +

    List[tf.Tensor]

Examples

In this example, we’ll walk through generating images using a simple Keras model and a data generation configuration. The process involves creating a model, setting up a data generation configuration, and finally generating images with specified parameters.

Start by importing the Model Compression Toolkit (MCT), TensorFlow, and some layers from tensorflow.keras:

-
>>> import model_compression_toolkit as mct
->>> from tensorflow.keras.models import Sequential
->>> from tensorflow.keras.layers import Conv2D, BatchNormalization, Flatten, Dense, Reshape
+
>>> import model_compression_toolkit as mct
+>>> from tensorflow.keras.models import Sequential
+>>> from tensorflow.keras.layers import Conv2D, BatchNormalization, Flatten, Dense, Reshape
 

Next, define a simple Keras model:

@@ -86,6 +83,11 @@

Navigation

The generated images can then be used for various purposes, such as data-free quantization.

+
+
Return type:
+

Tensor

+
+
diff --git a/docs/api/api_docs/methods/keras_gradient_post_training_quantization.html b/docs/api/api_docs/methods/keras_gradient_post_training_quantization.html index c87c98636..c80b34b77 100644 --- a/docs/api/api_docs/methods/keras_gradient_post_training_quantization.html +++ b/docs/api/api_docs/methods/keras_gradient_post_training_quantization.html @@ -7,7 +7,7 @@ Keras Gradient Based Post Training Quantization — MCT Documentation: ver 2.6.0 - + @@ -58,11 +58,8 @@

Navigation

training quantization by comparing points between the float and quantized models, and minimizing the observed loss.

-
Return type:
-

Tuple[Model, Optional[UserInformation]]

-
-
Parameters:
-
    +
    Parameters:
    +
    • in_model (Model) – Keras model to quantize.

    • representative_data_gen (Callable) – Dataset used for calibration.

    • gptq_config (GradientPTQConfig) – Configuration for using gptq (e.g. optimizer).

    • @@ -72,21 +69,21 @@

      Navigation

    • target_platform_capabilities (Union[TargetPlatformCapabilities, str]) – TargetPlatformCapabilities to optimize the Keras model according to.

    -
    Returns:
    -

    A quantized model and information the user may need to handle the quantized model.

    +
    Returns:
    +

    A quantized model and information the user may need to handle the quantized model.

Examples

Import a Keras model:

-
>>> from tensorflow.keras.applications.mobilenet import MobileNet
+
>>> from tensorflow.keras.applications.mobilenet import MobileNet
 >>> model = MobileNet()
 

Create a random dataset generator, for required number of calibration iterations (num_calibration_batches): In this example a random dataset of 10 batches each containing 4 images is used.

-
>>> import numpy as np
+
>>> import numpy as np
 >>> num_calibration_batches = 10
->>> def repr_datagen():
+>>> def repr_datagen():
 >>>     for _ in range(num_calibration_batches):
 >>>         yield [np.random.random((4, 224, 224, 3))]
 
@@ -116,6 +113,11 @@

Navigation

>>> quantized_model, quantization_info = mct.gptq.keras_gradient_post_training_quantization(model, repr_datagen, gptq_config, target_resource_utilization=ru, core_config=config)
 
+
+
Return type:
+

Tuple[Model, Optional[UserInformation]]

+
+
diff --git a/docs/api/api_docs/methods/keras_kpi_data.html b/docs/api/api_docs/methods/keras_kpi_data.html index 3bb212d12..b9d168cc8 100644 --- a/docs/api/api_docs/methods/keras_kpi_data.html +++ b/docs/api/api_docs/methods/keras_kpi_data.html @@ -7,7 +7,7 @@ Get Resource Utilization information for Keras Models — MCT Documentation: ver 2.6.0 - + @@ -48,37 +48,39 @@

Navigation

Builds the computation graph from the given model and hw modeling, and uses it to compute the resource utilization data.

-
Return type:
-

ResourceUtilization

-
-
Parameters:
-
    +
    Parameters:
    +
    • in_model (Model) – Keras model to quantize.

    • representative_data_gen (Callable) – Dataset used for calibration.

    • core_config (CoreConfig) – CoreConfig containing parameters for quantization and mixed precision of how the model should be quantized.

    • target_platform_capabilities (Union[TargetPlatformCapabilities, str]) – FrameworkQuantizationCapabilities to optimize the Keras model according to.

    -
    Returns:
    -

    A ResourceUtilization object with total weights parameters sum and max activation tensor.

    +
    Returns:
    +

    A ResourceUtilization object with total weights parameters sum and max activation tensor.

Examples

Import a Keras model:

-
>>> from tensorflow.keras.applications.mobilenet import MobileNet
+
>>> from tensorflow.keras.applications.mobilenet import MobileNet
 >>> model = MobileNet()
 

Create a random dataset generator:

-
>>> import numpy as np
->>> def repr_datagen(): yield [np.random.random((1, 224, 224, 3))]
+
>>> import numpy as np
+>>> def repr_datagen(): yield [np.random.random((1, 224, 224, 3))]
 

Import MCT and call for resource utilization data calculation:

-
>>> import model_compression_toolkit as mct
+
>>> import model_compression_toolkit as mct
 >>> ru_data = mct.core.keras_resource_utilization_data(model, repr_datagen)
 
+
+
Return type:
+

ResourceUtilization

+
+
diff --git a/docs/api/api_docs/methods/keras_load_quantizad_model.html b/docs/api/api_docs/methods/keras_load_quantizad_model.html index ed3b04e9a..8397bdd66 100644 --- a/docs/api/api_docs/methods/keras_load_quantizad_model.html +++ b/docs/api/api_docs/methods/keras_load_quantizad_model.html @@ -7,7 +7,7 @@ Load Quantized Keras Model — MCT Documentation: ver 2.6.0 - + diff --git a/docs/api/api_docs/methods/keras_post_training_quantization.html b/docs/api/api_docs/methods/keras_post_training_quantization.html index 15ce8354b..b42467913 100644 --- a/docs/api/api_docs/methods/keras_post_training_quantization.html +++ b/docs/api/api_docs/methods/keras_post_training_quantization.html @@ -7,7 +7,7 @@ Keras Post Training Quantization — MCT Documentation: ver 2.6.0 - + @@ -55,11 +55,8 @@

Navigation

In order to limit the maximal model’s size, a target ResourceUtilization need to be passed after weights_memory is set (in bytes).

-
Return type:
-

Tuple[Model, Optional[UserInformation]]

-
-
Parameters:
-
    +
    Parameters:
    +
    • in_model (Model) – Keras model to quantize.

    • representative_data_gen (Callable) – Dataset used for calibration.

    • target_resource_utilization (ResourceUtilization) – ResourceUtilization object to limit the search of the mixed-precision configuration as desired.

    • @@ -67,25 +64,25 @@

      Navigation

    • target_platform_capabilities (Union[TargetPlatformCapabilities, str]) – TargetPlatformCapabilities to optimize the Keras model according to.

    -
    Returns:
    -

    A quantized model and information the user may need to handle the quantized model.

    +
    Returns:
    +

    A quantized model and information the user may need to handle the quantized model.

Examples

Import MCT:

-
>>> import model_compression_toolkit as mct
+
>>> import model_compression_toolkit as mct
 

Import a Keras model:

-
>>> from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
+
>>> from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
 >>> model = MobileNetV2()
 

Create a random dataset generator, for required number of calibration iterations (num_calibration_batches): In this example a random dataset of 10 batches each containing 4 images is used.

-
>>> import numpy as np
+
>>> import numpy as np
 >>> num_calibration_batches = 10
->>> def repr_datagen():
+>>> def repr_datagen():
 >>>     for _ in range(num_calibration_batches):
 >>>         yield [np.random.random((4, 224, 224, 3))]
 
@@ -113,6 +110,11 @@

Navigation

For more configuration options, please take a look at our API documentation.

+
+
Return type:
+

Tuple[Model, Optional[UserInformation]]

+
+
diff --git a/docs/api/api_docs/methods/keras_pruning_experimental.html b/docs/api/api_docs/methods/keras_pruning_experimental.html index be1cb8550..4732e318a 100644 --- a/docs/api/api_docs/methods/keras_pruning_experimental.html +++ b/docs/api/api_docs/methods/keras_pruning_experimental.html @@ -7,7 +7,7 @@ Keras Structured Pruning — MCT Documentation: ver 2.6.0 - + @@ -53,11 +53,8 @@

Navigation

identify groups of channels that can be removed with minimal impact on performance.

Notice that the pruned model must be retrained to recover the compressed model’s performance.

-
Return type:
-

Tuple[Model, PruningInfo]

-
-
Parameters:
-
    +
    Parameters:
    +
    • model (Model) – The original Keras model to be pruned.

    • target_resource_utilization (ResourceUtilization) – The target Key Performance Indicators to be achieved through pruning.

    • representative_data_gen (Callable) – A function to generate representative data for pruning analysis.

    • @@ -65,11 +62,11 @@

      Navigation

    • target_platform_capabilities (Union[TargetPlatformCapabilities, str]) – Platform-specific constraints and capabilities. Defaults to DEFAULT_KERAS_TPC.

    -
    Returns:
    -

    A tuple containing the pruned Keras model and associated pruning information.

    +
    Returns:
    +

    A tuple containing the pruned Keras model and associated pruning information.

    -
    Return type:
    -

    Tuple[Model, PruningInfo]

    +
    Return type:
    +

    Tuple[Model, PruningInfo]

@@ -78,17 +75,17 @@

Navigation

Examples

Import MCT:

-
>>> import model_compression_toolkit as mct
+
>>> import model_compression_toolkit as mct
 

Import a Keras model:

-
>>> from tensorflow.keras.applications.resnet50 import ResNet50
+
>>> from tensorflow.keras.applications.resnet50 import ResNet50
 >>> model = ResNet50()
 

Create a random dataset generator:

-
>>> import numpy as np
->>> def repr_datagen(): yield [np.random.random((1, 224, 224, 3))]
+
>>> import numpy as np
+>>> def repr_datagen(): yield [np.random.random((1, 224, 224, 3))]
 

Define a target resource utilization for pruning. @@ -109,6 +106,11 @@

Navigation

>>> pruned_model, pruning_info = mct.pruning.keras_pruning_experimental(model=model, target_resource_utilization=target_resource_utilization, representative_data_gen=repr_datagen, pruning_config=pruning_config)
 
+
+
Return type:
+

Tuple[Model, PruningInfo]

+
+
diff --git a/docs/api/api_docs/methods/keras_quantization_aware_training_finalize_experimental.html b/docs/api/api_docs/methods/keras_quantization_aware_training_finalize_experimental.html index a468b0226..4f7c0129c 100644 --- a/docs/api/api_docs/methods/keras_quantization_aware_training_finalize_experimental.html +++ b/docs/api/api_docs/methods/keras_quantization_aware_training_finalize_experimental.html @@ -7,7 +7,7 @@ Keras Quantization Aware Training Model Finalize — MCT Documentation: ver 2.6.0 - + @@ -45,29 +45,26 @@

Navigation

model_compression_toolkit.qat.keras_quantization_aware_training_finalize_experimental(in_model)

Convert a model fine-tuned by the user (Trainable quantizers) to a model with Inferable quantizers.

-
Return type:
-

Model

-
-
Parameters:
-

in_model (Model) – Keras model to replace TrainableQuantizer with InferableQuantizer

+
Parameters:
+

in_model (Model) – Keras model to replace TrainableQuantizer with InferableQuantizer

-
Returns:
-

A quantized model with Inferable quantizers

+
Returns:
+

A quantized model with Inferable quantizers

Examples

Import MCT:

-
>>> import model_compression_toolkit as mct
+
>>> import model_compression_toolkit as mct
 

Import a Keras model:

-
>>> from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
+
>>> from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
 >>> model = MobileNetV2()
 

Create a random dataset generator:

-
>>> import numpy as np
->>> def repr_datagen(): yield [np.random.random((1, 224, 224, 3))]
+
>>> import numpy as np
+>>> def repr_datagen(): yield [np.random.random((1, 224, 224, 3))]
 

Create a MCT core config, containing the quantization configuration:

@@ -96,6 +93,11 @@

Navigation

>>> quantized_model = mct.qat.keras_quantization_aware_training_finalize_experimental(quantized_model)
+
+
Return type:
+

Model

+
+
diff --git a/docs/api/api_docs/methods/keras_quantization_aware_training_init_experimental.html b/docs/api/api_docs/methods/keras_quantization_aware_training_init_experimental.html index c915dc2b8..6c0ff4112 100644 --- a/docs/api/api_docs/methods/keras_quantization_aware_training_init_experimental.html +++ b/docs/api/api_docs/methods/keras_quantization_aware_training_init_experimental.html @@ -7,7 +7,7 @@ Keras Quantization Aware Training Model Init — MCT Documentation: ver 2.6.0 - + @@ -75,19 +75,19 @@

Navigation

Examples

Import MCT:

-
>>> import model_compression_toolkit as mct
+
>>> import model_compression_toolkit as mct
 

Import a Keras model:

-
>>> from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
+
>>> from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
 >>> model = MobileNetV2()
 

Create a random dataset generator, for required number of calibration iterations (num_calibration_batches): In this example a random dataset of 10 batches each containing 4 images is used.

-
>>> import numpy as np
+
>>> import numpy as np
 >>> num_calibration_batches = 10
->>> def repr_datagen():
+>>> def repr_datagen():
 >>>     for _ in range(num_calibration_batches):
 >>>         yield [np.random.random((4, 224, 224, 3))]
 
diff --git a/docs/api/api_docs/methods/pytorch_data_generation_experimental.html b/docs/api/api_docs/methods/pytorch_data_generation_experimental.html index ba62ecb96..990280a9e 100644 --- a/docs/api/api_docs/methods/pytorch_data_generation_experimental.html +++ b/docs/api/api_docs/methods/pytorch_data_generation_experimental.html @@ -7,7 +7,7 @@ Pytorch Data Generation — MCT Documentation: ver 2.6.0 - + @@ -45,30 +45,27 @@

Navigation

model_compression_toolkit.data_generation.pytorch_data_generation_experimental(model, n_images, output_image_size, data_generation_config)

Function to perform data generation using the provided model and data generation configuration.

-
Return type:
-

List[Tensor]

-
-
Parameters:
-
    +
    Parameters:
    +
    • model (Module) – PyTorch model to generate data for.

    • n_images (int) – Number of images to generate.

    • output_image_size (Union[int, Tuple[int, int]]) – The hight and width size of the output images.

    • data_generation_config (DataGenerationConfig) – Configuration for data generation.

    -
    Returns:
    -

    Finalized list containing generated images.

    +
    Returns:
    +

    Finalized list containing generated images.

    -
    Return type:
    -

    List[Tensor]

    +
    Return type:
    +

    List[Tensor]

Examples

In this example, we’ll walk through generating images using a simple PyTorch model and a data generation configuration. The process involves creating a model, setting up a data generation configuration, and finally generating images with specified parameters.

Start by importing the Model Compression Toolkit (MCT), PyTorch, and some modules from torch.nn:

-
>>> import model_compression_toolkit as mct
->>> import torch.nn as nn
->>> from torch.nn import Conv2d, BatchNorm2d, Flatten, Linear
+
>>> import model_compression_toolkit as mct
+>>> import torch.nn as nn
+>>> from torch.nn import Conv2d, BatchNorm2d, Flatten, Linear
 

Next, define a simple PyTorch model:

@@ -86,6 +83,11 @@

Navigation

The generated images can then be used for various purposes, such as data-free quantization.

+
+
Return type:
+

List[Tensor]

+
+
diff --git a/docs/api/api_docs/methods/pytorch_gradient_post_training_quantization.html b/docs/api/api_docs/methods/pytorch_gradient_post_training_quantization.html index 572b6b545..4180ce4b4 100644 --- a/docs/api/api_docs/methods/pytorch_gradient_post_training_quantization.html +++ b/docs/api/api_docs/methods/pytorch_gradient_post_training_quantization.html @@ -7,7 +7,7 @@ Pytorch Gradient Based Post Training Quantization — MCT Documentation: ver 2.6.0 - + @@ -58,11 +58,8 @@

Navigation

training quantization by comparing points between the float and quantized models, and minimizing the observed loss.

-
Return type:
-

Tuple[Module, Optional[UserInformation]]

-
-
Parameters:
-
    +
    Parameters:
    +
    • model (Module) – Pytorch model to quantize.

    • representative_data_gen (Callable) – Dataset used for calibration.

    • target_resource_utilization (ResourceUtilization) – ResourceUtilization object to limit the search of the mixed-precision configuration as desired.

    • @@ -72,25 +69,25 @@

      Navigation

    • target_platform_capabilities (Union[TargetPlatformCapabilities, str]) – TargetPlatformCapabilities to optimize the PyTorch model according to.

    -
    Returns:
    -

    A quantized module and information the user may need to handle the quantized module.

    +
    Returns:
    +

    A quantized module and information the user may need to handle the quantized module.

Examples

Import Model Compression Toolkit:

-
>>> import model_compression_toolkit as mct
+
>>> import model_compression_toolkit as mct
 

Import a Pytorch module:

-
>>> from torchvision import models
+
>>> from torchvision import models
 >>> module = models.mobilenet_v2()
 

Create a random dataset generator, for required number of calibration iterations (num_calibration_batches): In this example a random dataset of 10 batches each containing 4 images is used.

-
>>> import numpy as np
+
>>> import numpy as np
 >>> num_calibration_batches = 10
->>> def repr_datagen():
+>>> def repr_datagen():
 >>>     for _ in range(num_calibration_batches):
 >>>         yield [np.random.random((4, 3, 224, 224))]
 
@@ -103,6 +100,11 @@

Navigation

>>> quantized_module, quantization_info = mct.gptq.pytorch_gradient_post_training_quantization(module, repr_datagen, core_config=config, gptq_config=gptq_conf)
 
+
+
Return type:
+

Tuple[Module, Optional[UserInformation]]

+
+
diff --git a/docs/api/api_docs/methods/pytorch_kpi_data.html b/docs/api/api_docs/methods/pytorch_kpi_data.html index a566b86d8..c6f2cc235 100644 --- a/docs/api/api_docs/methods/pytorch_kpi_data.html +++ b/docs/api/api_docs/methods/pytorch_kpi_data.html @@ -7,7 +7,7 @@ Get Resource Utilization information for PyTorch Models — MCT Documentation: ver 2.6.0 - + @@ -46,37 +46,39 @@

Navigation

Computes resource utilization data that can be used to calculate the desired target resource utilization for mixed-precision quantization. Builds the computation graph from the given model and target platform capabilities, and uses it to compute the resource utilization data.

-
Return type:
-

ResourceUtilization

-
-
Parameters:
-
    +
    Parameters:
    +
    • in_model (Model) – PyTorch model to quantize.

    • representative_data_gen (Callable) – Dataset used for calibration.

    • core_config (CoreConfig) – CoreConfig containing parameters for quantization and mixed precision

    • target_platform_capabilities (Union[TargetPlatformCapabilities, str]) – FrameworkQuantizationCapabilities to optimize the PyTorch model according to.

    -
    Returns:
    -

    A ResourceUtilization object with total weights parameters sum and max activation tensor.

    +
    Returns:
    +

    A ResourceUtilization object with total weights parameters sum and max activation tensor.

Examples

Import a Pytorch model:

-
>>> from torchvision import models
+
>>> from torchvision import models
 >>> module = models.mobilenet_v2()
 

Create a random dataset generator:

-
>>> import numpy as np
->>> def repr_datagen(): yield [np.random.random((1, 3, 224, 224))]
+
>>> import numpy as np
+>>> def repr_datagen(): yield [np.random.random((1, 3, 224, 224))]
 

Import mct and call for resource utilization data calculation:

-
>>> import model_compression_toolkit as mct
+
>>> import model_compression_toolkit as mct
 >>> ru_data = mct.core.pytorch_resource_utilization_data(module, repr_datagen)
 
+
+
Return type:
+

ResourceUtilization

+
+
diff --git a/docs/api/api_docs/methods/pytorch_post_training_quantization.html b/docs/api/api_docs/methods/pytorch_post_training_quantization.html index ac93413e2..0773fb24b 100644 --- a/docs/api/api_docs/methods/pytorch_post_training_quantization.html +++ b/docs/api/api_docs/methods/pytorch_post_training_quantization.html @@ -7,7 +7,7 @@ Pytorch Post Training Quantization — MCT Documentation: ver 2.6.0 - + @@ -55,11 +55,8 @@

Navigation

training quantization by comparing points between the float and quantized modules, and minimizing the observed loss.

-
Return type:
-

Tuple[Module, Optional[UserInformation]]

-
-
Parameters:
-
    +
    Parameters:
    +
    • in_module (Module) – Pytorch module to quantize.

    • representative_data_gen (Callable) – Dataset used for calibration.

    • target_resource_utilization (ResourceUtilization) – ResourceUtilization object to limit the search of the mixed-precision configuration as desired.

    • @@ -67,31 +64,36 @@

      Navigation

    • target_platform_capabilities (Union[TargetPlatformCapabilities, str]) – TargetPlatformCapabilities to optimize the PyTorch model according to.

    -
    Returns:
    -

    A quantized module and information the user may need to handle the quantized module.

    +
    Returns:
    +

    A quantized module and information the user may need to handle the quantized module.

Examples

Import a Pytorch module:

-
>>> from torchvision import models
+
>>> from torchvision import models
 >>> module = models.mobilenet_v2()
 

Create a random dataset generator, for required number of calibration iterations (num_calibration_batches): In this example a random dataset of 10 batches each containing 4 images is used.

-
>>> import numpy as np
+
>>> import numpy as np
 >>> num_calibration_batches = 10
->>> def repr_datagen():
+>>> def repr_datagen():
 >>>     for _ in range(num_calibration_batches):
 >>>         yield [np.random.random((4, 3, 224, 224))]
 

Import MCT and pass the module with the representative dataset generator to get a quantized module Set number of clibration iterations to 1:

-
>>> import model_compression_toolkit as mct
+
>>> import model_compression_toolkit as mct
 >>> quantized_module, quantization_info = mct.ptq.pytorch_post_training_quantization(module, repr_datagen)
 
+
+
Return type:
+

Tuple[Module, Optional[UserInformation]]

+
+
diff --git a/docs/api/api_docs/methods/pytorch_pruning_experimental.html b/docs/api/api_docs/methods/pytorch_pruning_experimental.html index b4e43bc86..706ec4862 100644 --- a/docs/api/api_docs/methods/pytorch_pruning_experimental.html +++ b/docs/api/api_docs/methods/pytorch_pruning_experimental.html @@ -7,7 +7,7 @@ Pytorch Structured Pruning — MCT Documentation: ver 2.6.0 - + @@ -53,11 +53,8 @@

Navigation

identify groups of channels that can be removed with minimal impact on performance.

Notice that the pruned model must be retrained to recover the compressed model’s performance.

-
Return type:
-

Tuple[Module, PruningInfo]

-
-
Parameters:
-
    +
    Parameters:
    +
    • model (Module) – The PyTorch model to be pruned.

    • target_resource_utilization (ResourceUtilization) – Key Performance Indicators specifying the pruning targets.

    • representative_data_gen (Callable) – A function to generate representative data for pruning analysis.

    • @@ -66,11 +63,11 @@

      Navigation

      Defaults to DEFAULT_PYTORCH_TPC.

    -
    Returns:
    -

    A tuple containing the pruned Pytorch model and associated pruning information.

    +
    Returns:
    +

    A tuple containing the pruned Pytorch model and associated pruning information.

    -
    Return type:
    -

    Tuple[Model, PruningInfo]

    +
    Return type:
    +

    Tuple[Model, PruningInfo]

@@ -79,17 +76,17 @@

Navigation

Examples

Import MCT:

-
>>> import model_compression_toolkit as mct
+
>>> import model_compression_toolkit as mct
 

Import a Pytorch model:

-
>>> from torchvision.models import resnet50, ResNet50_Weights
+
>>> from torchvision.models import resnet50, ResNet50_Weights
 >>> model = resnet50(weights=ResNet50_Weights.IMAGENET1K_V1)
 

Create a random dataset generator:

-
>>> import numpy as np
->>> def repr_datagen(): yield [np.random.random((1, 3, 224, 224))]
+
>>> import numpy as np
+>>> def repr_datagen(): yield [np.random.random((1, 3, 224, 224))]
 

Define a target resource utilization for pruning. @@ -110,6 +107,11 @@

Navigation

>>> pruned_model, pruning_info = mct.pruning.pytorch_pruning_experimental(model=model, target_resource_utilization=target_resource_utilization, representative_data_gen=repr_datagen, pruning_config=pruning_config)
 
+
+
Return type:
+

Tuple[Module, PruningInfo]

+
+
diff --git a/docs/api/api_docs/methods/pytorch_quantization_aware_training_finalize_experimental.html b/docs/api/api_docs/methods/pytorch_quantization_aware_training_finalize_experimental.html index a8a81cf18..1365711fa 100644 --- a/docs/api/api_docs/methods/pytorch_quantization_aware_training_finalize_experimental.html +++ b/docs/api/api_docs/methods/pytorch_quantization_aware_training_finalize_experimental.html @@ -7,7 +7,7 @@ PyTorch Quantization Aware Training Model Finalize — MCT Documentation: ver 2.6.0 - + @@ -55,17 +55,17 @@

Navigation

Examples

Import MCT:

-
>>> import model_compression_toolkit as mct
+
>>> import model_compression_toolkit as mct
 

Import a Pytorch model:

-
>>> from torchvision.models import mobilenet_v2
+
>>> from torchvision.models import mobilenet_v2
 >>> model = mobilenet_v2(pretrained=True)
 

Create a random dataset generator:

-
>>> import numpy as np
->>> def repr_datagen(): yield [np.random.random((1, 224, 224, 3))]
+
>>> import numpy as np
+>>> def repr_datagen(): yield [np.random.random((1, 224, 224, 3))]
 

Create a MCT core config, containing the quantization configuration:

diff --git a/docs/api/api_docs/methods/pytorch_quantization_aware_training_init_experimental.html b/docs/api/api_docs/methods/pytorch_quantization_aware_training_init_experimental.html index c1e3a19f2..a58199df2 100644 --- a/docs/api/api_docs/methods/pytorch_quantization_aware_training_init_experimental.html +++ b/docs/api/api_docs/methods/pytorch_quantization_aware_training_init_experimental.html @@ -7,7 +7,7 @@ PyTorch Quantization Aware Training Model Init — MCT Documentation: ver 2.6.0 - + @@ -74,18 +74,18 @@

Navigation

Examples

Import MCT:

-
>>> import model_compression_toolkit as mct
+
>>> import model_compression_toolkit as mct
 

Import a Pytorch model:

-
>>> from torchvision.models import mobilenet_v2
+
>>> from torchvision.models import mobilenet_v2
 >>> model = mobilenet_v2(pretrained=True)
 

Create a random dataset generator, for required number of calibration iterations (num_calibration_batches). In this example, a random dataset of 10 batches each containing 4 images is used:

-
>>> import numpy as np
+
>>> import numpy as np
 >>> num_calibration_batches = 10
->>> def repr_datagen():
+>>> def repr_datagen():
 >>>     for _ in range(num_calibration_batches):
 >>>         yield [np.random.random((4, 3, 224, 224))]
 
diff --git a/docs/api/api_docs/methods/set_logger_path.html b/docs/api/api_docs/methods/set_logger_path.html index 66272e074..0f14537ca 100644 --- a/docs/api/api_docs/methods/set_logger_path.html +++ b/docs/api/api_docs/methods/set_logger_path.html @@ -7,7 +7,7 @@ Enable a Logger — MCT Documentation: ver 2.6.0 - + @@ -45,11 +45,8 @@

Navigation

model_compression_toolkit.set_log_folder(folder, level=logging.INFO)

Set a directory path for saving a log file.

-
Return type:
-

None

-
-
Parameters:
-
    +
    Parameters:
    +
    • folder (str) – Folder path to save the log file.

    • level (int) – Level of verbosity to set to the logger and handlers.

    @@ -61,6 +58,11 @@

    Navigation

    to set up logging.

    Don’t use Python’s original logger.

+
+
Return type:
+

None

+
+
diff --git a/docs/api/api_docs/methods/xquant_report_keras_experimental.html b/docs/api/api_docs/methods/xquant_report_keras_experimental.html index 2feee9bb7..65d9d733a 100644 --- a/docs/api/api_docs/methods/xquant_report_keras_experimental.html +++ b/docs/api/api_docs/methods/xquant_report_keras_experimental.html @@ -7,7 +7,7 @@ XQuant Report Keras — MCT Documentation: ver 2.6.0 - + @@ -45,11 +45,8 @@

Navigation

model_compression_toolkit.xquant.keras.facade_xquant_report.xquant_report_keras_experimental(float_model, quantized_model, repr_dataset, validation_dataset, xquant_config)

Generate an explainable quantization report for a quantized Keras model.

-
Return type:
-

Dict[str, Any]

-
-
Parameters:
-
    +
    Parameters:
    +
    • float_model (keras.Model) – The original floating-point Keras model.

    • quantized_model (keras.Model) – The quantized Keras model.

    • repr_dataset (Callable) – The representative dataset used during quantization for similarity metrics computation.

    • @@ -57,11 +54,14 @@

      Navigation

    • xquant_config (XQuantConfig) – Configuration settings for explainable quantization.

    -
    Returns:
    -

    A dictionary containing the collected similarity metrics and report data.

    +
    Returns:
    +

    A dictionary containing the collected similarity metrics and report data.

    +
    +
    Return type:
    +

    Dict[str, Any]

    Return type:
    -

    Dict[str, Any]

    +

    Dict[str, Any]

diff --git a/docs/api/api_docs/methods/xquant_report_pytorch_experimental.html b/docs/api/api_docs/methods/xquant_report_pytorch_experimental.html index 696d88e5e..8388913a1 100644 --- a/docs/api/api_docs/methods/xquant_report_pytorch_experimental.html +++ b/docs/api/api_docs/methods/xquant_report_pytorch_experimental.html @@ -7,7 +7,7 @@ XQuant Report Pytorch — MCT Documentation: ver 2.6.0 - + diff --git a/docs/api/api_docs/methods/xquant_report_troubleshoot_pytorch_experimental.html b/docs/api/api_docs/methods/xquant_report_troubleshoot_pytorch_experimental.html index 0f3b1c42b..6817c7f5e 100644 --- a/docs/api/api_docs/methods/xquant_report_troubleshoot_pytorch_experimental.html +++ b/docs/api/api_docs/methods/xquant_report_troubleshoot_pytorch_experimental.html @@ -7,7 +7,7 @@ XQuant Report Troubleshoot Pytorch — MCT Documentation: ver 2.6.0 - + diff --git a/docs/api/api_docs/modules/core_config.html b/docs/api/api_docs/modules/core_config.html index a114722ad..f83bf0b59 100644 --- a/docs/api/api_docs/modules/core_config.html +++ b/docs/api/api_docs/modules/core_config.html @@ -7,7 +7,7 @@ CoreConfig — MCT Documentation: ver 2.6.0 - + diff --git a/docs/api/api_docs/modules/debug_config.html b/docs/api/api_docs/modules/debug_config.html index 9cce28a75..56b140522 100644 --- a/docs/api/api_docs/modules/debug_config.html +++ b/docs/api/api_docs/modules/debug_config.html @@ -7,7 +7,7 @@ debug_config Module — MCT Documentation: ver 2.6.0 - + @@ -45,7 +45,7 @@

DebugConfig
-class model_compression_toolkit.core.DebugConfig(analyze_similarity=False, network_editor=<factory>, simulate_scheduler=False, bypass=False)
+class model_compression_toolkit.core.DebugConfig(analyze_similarity=False, network_editor=<factory>, simulate_scheduler=False, bypass=False, progress_info_callback=None)

A dataclass for MCT core debug information.

Parameters:
@@ -55,9 +55,85 @@

DebugConfigEditRule]) – A list of rules and actions to edit the network for quantization.

  • simulate_scheduler (bool) – Simulate scheduler behavior to compute operators’ order and cuts.

  • bypass (bool) – A flag to enable MCT bypass, which skips MCT runner and returns the input model unchanged.

  • +
  • progress_info_callback (Callable) – A user-defined callback function for retrieving progress information.

  • +

    About progress_info_callback

    +
    +

    The progress_info_callback parameter in DebugConfig enables the following features and allows users to retrieve progress information when a callback function is configured:

    +
      +
    • The callback function can receive MCT progress information.

    • +
    • A progress bar is displayed in the CUI, allowing users to visualize how much processing has been completed while MCT is running.

    • +
    +

    If no callback function is set, these features are disabled and the behavior and output remain unchanged. +Examples of how to create a callback function to enable these features are provided in the Examples section.

    +
    +

    Examples

    +

    Create a callable callback function. +When defining the callback, make sure it accepts a dictionary representing the current processing state as an argument.

    +

    Example 1: Use a class to keep track of the processing history.

    +
    >>> class ProgressInfoCallback:
    +...    def __init__(self):
    +...        self.history = []
    +...
    +...    def __call__(self, info):
    +...        current = info["currentComponent"]
    +...        total = info["totalComponents"]
    +...        component_name = info["completedComponents"]
    +...
    +...        self.history.append({
    +...            "component_name": component_name,
    +...            "current": current,
    +...            "total": total
    +...        })
    +...
    +>>> progress_info_callback = ProgressInfoCallback()
    +
    +
    +

    Example 2: Use a function to output the progress percentage and processing name to standard error (stderr).

    +
    >>> def progress_info_callback(info):
    +...    current = info["currentComponent"]
    +...    total = info["totalComponents"]
    +...    component_name = info["completedComponents"]
    +...
    +...    progress_percent = (current / total * 100.0)
    +...
    +...    print(f"[{current}/{total}] {progress_percent:6.2f}% {component_name}",
    +...          file=__import__('sys').stderr, flush=True)
    +
    +
    +

    From the processing state dictionary, you can retrieve information using the following keys:

    + + + + + + + + + + + + + + + + + + + + + + +
    Keys in the processing state dictionary

    Parameter Key

    Value Type

    Description

    “currentComponent”

    int

    Current processing step

    “totalComponents”

    int

    Total number of processing steps

    “completedComponents”

    str

    Name of the component currently being processed

    +

    Import MCT and configure DebugConfig with the callback function you created. +Configure CoreConfig with this DebugConfig and use it.

    +
    >>> import model_compression_toolkit as mct
    +>>> debug_config = mct.core.DebugConfig(progress_info_callback=progress_info_callback)
    +>>> core_config = mct.core.CoreConfig(debug_config=debug_config)
    +
    +
    diff --git a/docs/api/api_docs/modules/exporter.html b/docs/api/api_docs/modules/exporter.html index 435f7315a..2399c6b1b 100644 --- a/docs/api/api_docs/modules/exporter.html +++ b/docs/api/api_docs/modules/exporter.html @@ -7,7 +7,7 @@ exporter Module — MCT Documentation: ver 2.6.0 - + @@ -78,11 +78,8 @@

    keras_export_model -
    Return type:
    -

    Dict[str, type]

    -
    -
    Parameters:
    -
    @@ -186,17 +186,17 @@

    Pytorch Tutorial
    import model_compression_toolkit as mct
    -import numpy as np
    -import torch
    -from torchvision.models.mobilenetv2 import mobilenet_v2
    +
    import model_compression_toolkit as mct
    +import numpy as np
    +import torch
    +from torchvision.models.mobilenetv2 import mobilenet_v2
     
     # Create a model
     float_model = mobilenet_v2()
     
     
     # Notice that here the representative dataset is random for demonstration only.
    -def representative_data_gen():
    +def representative_data_gen():
         yield [np.random.random((1, 3, 224, 224))]
     
     
    @@ -254,8 +254,8 @@ 

    ONNX model output names

    Use exported model for inference

    To load and infer using the exported model, which was exported to an ONNX file in MCTQ format, we will use mct_quantizers method get_ort_session_options during onnxruntime session creation. Notice, inference on models that are exported in this format are slowly and suffers from longer latency. However, inference of these models on IMX500 will not suffer from this issue.

    -
    import mct_quantizers as mctq
    -import onnxruntime as ort
    +
    import mct_quantizers as mctq
    +import onnxruntime as ort
     
     sess = ort.InferenceSession(onnx_file_path,
                                 mctq.get_ort_session_options(),
    diff --git a/docs/api/api_docs/modules/layer_filters.html b/docs/api/api_docs/modules/layer_filters.html
    index 941a9ebaa..da4e14b6e 100644
    --- a/docs/api/api_docs/modules/layer_filters.html
    +++ b/docs/api/api_docs/modules/layer_filters.html
    @@ -7,7 +7,7 @@
         
     
         Layer Attributes Filters — MCT Documentation: ver 2.6.0
    -    
    +    
         
         
         
    diff --git a/docs/api/api_docs/modules/network_editor.html b/docs/api/api_docs/modules/network_editor.html
    index 54006edb3..56cd07d0d 100644
    --- a/docs/api/api_docs/modules/network_editor.html
    +++ b/docs/api/api_docs/modules/network_editor.html
    @@ -7,7 +7,7 @@
         
     
         network_editor Module — MCT Documentation: ver 2.6.0
    -    
    +    
         
         
         
    @@ -50,9 +50,9 @@ 

    EditRule and the action is applied on these nodes during the quantization process.

    Examples

    Create an EditRule to quantize all Conv2D kernel attribute weights using 9 bits:

    -
    >>> import model_compression_toolkit as mct
    ->>> from model_compression_toolkit.core.keras.constants import KERNEL
    ->>> from tensorflow.keras.layers import Conv2D
    +
    >>> import model_compression_toolkit as mct
    +>>> from model_compression_toolkit.core.keras.constants import KERNEL
    +>>> from tensorflow.keras.layers import Conv2D
     >>> er_list = [mct.core.network_editor.EditRule(filter=mct.core.network_editor.NodeTypeFilter(Conv2D), action=mct.core.network_editor.ChangeCandidatesWeightsQuantConfigAttr(attr_name=KERNEL, weights_n_bits=9))]
     
    diff --git a/docs/api/api_docs/modules/qat_config.html b/docs/api/api_docs/modules/qat_config.html index da879fe16..a6ad8b503 100644 --- a/docs/api/api_docs/modules/qat_config.html +++ b/docs/api/api_docs/modules/qat_config.html @@ -7,7 +7,7 @@ qat_config Module — MCT Documentation: ver 2.6.0 - + diff --git a/docs/api/api_docs/modules/target_platform_capabilities.html b/docs/api/api_docs/modules/target_platform_capabilities.html index d6b785708..76f1524bb 100644 --- a/docs/api/api_docs/modules/target_platform_capabilities.html +++ b/docs/api/api_docs/modules/target_platform_capabilities.html @@ -7,7 +7,7 @@ target_platform_capabilities Module — MCT Documentation: ver 2.6.0 - + diff --git a/docs/api/api_docs/modules/trainable_infrastructure.html b/docs/api/api_docs/modules/trainable_infrastructure.html index 52e173b98..15de91ad3 100644 --- a/docs/api/api_docs/modules/trainable_infrastructure.html +++ b/docs/api/api_docs/modules/trainable_infrastructure.html @@ -7,7 +7,7 @@ trainable_infrastructure Module — MCT Documentation: ver 2.6.0 - + @@ -128,8 +128,8 @@

    TrainableQuantizerWeightsConfig
    from model_compression_toolkit.target_platform_capabilities.target_platform_capabilities import QuantizationMethod
    -from model_compression_toolkit.constants import THRESHOLD, MIN_THRESHOLD
    +
    from model_compression_toolkit.target_platform_capabilities.target_platform_capabilities import QuantizationMethod
    +from model_compression_toolkit.constants import THRESHOLD, MIN_THRESHOLD
     
     TrainableQuantizerWeightsConfig(weights_quantization_method=QuantizationMethod.SYMMETRIC,
                                                weights_n_bits=8,
    @@ -165,8 +165,8 @@ 

    TrainableQuantizerActivationConfig
    from model_compression_toolkit.target_platform_capabilities.target_platform_capabilities import QuantizationMethod
    -from model_compression_toolkit.constants import THRESHOLD, MIN_THRESHOLD
    +
    from model_compression_toolkit.target_platform_capabilities.target_platform_capabilities import QuantizationMethod
    +from model_compression_toolkit.constants import THRESHOLD, MIN_THRESHOLD
     
     TrainableQuantizerActivationConfig(activation_quantization_method=QuantizationMethod.UNIFORM,
                                                   activation_n_bits=8,
    diff --git a/docs/api/api_docs/notes/tpc_note.html b/docs/api/api_docs/notes/tpc_note.html
    index a93c45d53..885ac18cf 100644
    --- a/docs/api/api_docs/notes/tpc_note.html
    +++ b/docs/api/api_docs/notes/tpc_note.html
    @@ -7,7 +7,7 @@
         
     
         <no title> — MCT Documentation: ver 2.6.0
    -    
    +    
         
         
         
    diff --git a/docs/docs_troubleshoot/genindex.html b/docs/docs_troubleshoot/genindex.html
    index e2f12927f..1350a4150 100644
    --- a/docs/docs_troubleshoot/genindex.html
    +++ b/docs/docs_troubleshoot/genindex.html
    @@ -6,7 +6,7 @@
         
         
         Index — TroubleShooting Documentation (MCT XQuant Extension Tool): ver 1.0
    -    
    +    
         
         
         
    diff --git a/docs/docs_troubleshoot/index.html b/docs/docs_troubleshoot/index.html
    index 73d959f8c..0ddb16140 100644
    --- a/docs/docs_troubleshoot/index.html
    +++ b/docs/docs_troubleshoot/index.html
    @@ -7,7 +7,7 @@
         
     
         TroubleShooting Manual (MCT XQuant Extension Tool) — TroubleShooting Documentation (MCT XQuant Extension Tool): ver 1.0
    -    
    +    
         
         
         
    diff --git a/docs/docs_troubleshoot/search.html b/docs/docs_troubleshoot/search.html
    index 7cda3fba8..16ece6516 100644
    --- a/docs/docs_troubleshoot/search.html
    +++ b/docs/docs_troubleshoot/search.html
    @@ -6,7 +6,7 @@
         
         
         Search — TroubleShooting Documentation (MCT XQuant Extension Tool): ver 1.0
    -    
    +    
         
         
         
    diff --git a/docs/docs_troubleshoot/static/pygments.css b/docs/docs_troubleshoot/static/pygments.css
    index 5f2b0a250..0d49244ed 100644
    --- a/docs/docs_troubleshoot/static/pygments.css
    +++ b/docs/docs_troubleshoot/static/pygments.css
    @@ -6,26 +6,26 @@ span.linenos.special { color: #000000; background-color: #ffffc0; padding-left:
     .highlight .hll { background-color: #ffffcc }
     .highlight { background: #eeffcc; }
     .highlight .c { color: #408090; font-style: italic } /* Comment */
    -.highlight .err { border: 1px solid #F00 } /* Error */
    +.highlight .err { border: 1px solid #FF0000 } /* Error */
     .highlight .k { color: #007020; font-weight: bold } /* Keyword */
    -.highlight .o { color: #666 } /* Operator */
    +.highlight .o { color: #666666 } /* Operator */
     .highlight .ch { color: #408090; font-style: italic } /* Comment.Hashbang */
     .highlight .cm { color: #408090; font-style: italic } /* Comment.Multiline */
     .highlight .cp { color: #007020 } /* Comment.Preproc */
     .highlight .cpf { color: #408090; font-style: italic } /* Comment.PreprocFile */
     .highlight .c1 { color: #408090; font-style: italic } /* Comment.Single */
    -.highlight .cs { color: #408090; background-color: #FFF0F0 } /* Comment.Special */
    +.highlight .cs { color: #408090; background-color: #fff0f0 } /* Comment.Special */
     .highlight .gd { color: #A00000 } /* Generic.Deleted */
     .highlight .ge { font-style: italic } /* Generic.Emph */
     .highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */
    -.highlight .gr { color: #F00 } /* Generic.Error */
    +.highlight .gr { color: #FF0000 } /* Generic.Error */
     .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */
     .highlight .gi { color: #00A000 } /* Generic.Inserted */
    -.highlight .go { color: #333 } /* Generic.Output */
    -.highlight .gp { color: #C65D09; font-weight: bold } /* Generic.Prompt */
    +.highlight .go { color: #333333 } /* Generic.Output */
    +.highlight .gp { color: #c65d09; font-weight: bold } /* Generic.Prompt */
     .highlight .gs { font-weight: bold } /* Generic.Strong */
     .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
    -.highlight .gt { color: #04D } /* Generic.Traceback */
    +.highlight .gt { color: #0044DD } /* Generic.Traceback */
     .highlight .kc { color: #007020; font-weight: bold } /* Keyword.Constant */
     .highlight .kd { color: #007020; font-weight: bold } /* Keyword.Declaration */
     .highlight .kn { color: #007020; font-weight: bold } /* Keyword.Namespace */
    @@ -33,43 +33,43 @@ span.linenos.special { color: #000000; background-color: #ffffc0; padding-left:
     .highlight .kr { color: #007020; font-weight: bold } /* Keyword.Reserved */
     .highlight .kt { color: #902000 } /* Keyword.Type */
     .highlight .m { color: #208050 } /* Literal.Number */
    -.highlight .s { color: #4070A0 } /* Literal.String */
    -.highlight .na { color: #4070A0 } /* Name.Attribute */
    +.highlight .s { color: #4070a0 } /* Literal.String */
    +.highlight .na { color: #4070a0 } /* Name.Attribute */
     .highlight .nb { color: #007020 } /* Name.Builtin */
    -.highlight .nc { color: #0E84B5; font-weight: bold } /* Name.Class */
    -.highlight .no { color: #60ADD5 } /* Name.Constant */
    -.highlight .nd { color: #555; font-weight: bold } /* Name.Decorator */
    -.highlight .ni { color: #D55537; font-weight: bold } /* Name.Entity */
    +.highlight .nc { color: #0e84b5; font-weight: bold } /* Name.Class */
    +.highlight .no { color: #60add5 } /* Name.Constant */
    +.highlight .nd { color: #555555; font-weight: bold } /* Name.Decorator */
    +.highlight .ni { color: #d55537; font-weight: bold } /* Name.Entity */
     .highlight .ne { color: #007020 } /* Name.Exception */
    -.highlight .nf { color: #06287E } /* Name.Function */
    +.highlight .nf { color: #06287e } /* Name.Function */
     .highlight .nl { color: #002070; font-weight: bold } /* Name.Label */
    -.highlight .nn { color: #0E84B5; font-weight: bold } /* Name.Namespace */
    +.highlight .nn { color: #0e84b5; font-weight: bold } /* Name.Namespace */
     .highlight .nt { color: #062873; font-weight: bold } /* Name.Tag */
    -.highlight .nv { color: #BB60D5 } /* Name.Variable */
    +.highlight .nv { color: #bb60d5 } /* Name.Variable */
     .highlight .ow { color: #007020; font-weight: bold } /* Operator.Word */
    -.highlight .w { color: #BBB } /* Text.Whitespace */
    +.highlight .w { color: #bbbbbb } /* Text.Whitespace */
     .highlight .mb { color: #208050 } /* Literal.Number.Bin */
     .highlight .mf { color: #208050 } /* Literal.Number.Float */
     .highlight .mh { color: #208050 } /* Literal.Number.Hex */
     .highlight .mi { color: #208050 } /* Literal.Number.Integer */
     .highlight .mo { color: #208050 } /* Literal.Number.Oct */
    -.highlight .sa { color: #4070A0 } /* Literal.String.Affix */
    -.highlight .sb { color: #4070A0 } /* Literal.String.Backtick */
    -.highlight .sc { color: #4070A0 } /* Literal.String.Char */
    -.highlight .dl { color: #4070A0 } /* Literal.String.Delimiter */
    -.highlight .sd { color: #4070A0; font-style: italic } /* Literal.String.Doc */
    -.highlight .s2 { color: #4070A0 } /* Literal.String.Double */
    -.highlight .se { color: #4070A0; font-weight: bold } /* Literal.String.Escape */
    -.highlight .sh { color: #4070A0 } /* Literal.String.Heredoc */
    -.highlight .si { color: #70A0D0; font-style: italic } /* Literal.String.Interpol */
    -.highlight .sx { color: #C65D09 } /* Literal.String.Other */
    +.highlight .sa { color: #4070a0 } /* Literal.String.Affix */
    +.highlight .sb { color: #4070a0 } /* Literal.String.Backtick */
    +.highlight .sc { color: #4070a0 } /* Literal.String.Char */
    +.highlight .dl { color: #4070a0 } /* Literal.String.Delimiter */
    +.highlight .sd { color: #4070a0; font-style: italic } /* Literal.String.Doc */
    +.highlight .s2 { color: #4070a0 } /* Literal.String.Double */
    +.highlight .se { color: #4070a0; font-weight: bold } /* Literal.String.Escape */
    +.highlight .sh { color: #4070a0 } /* Literal.String.Heredoc */
    +.highlight .si { color: #70a0d0; font-style: italic } /* Literal.String.Interpol */
    +.highlight .sx { color: #c65d09 } /* Literal.String.Other */
     .highlight .sr { color: #235388 } /* Literal.String.Regex */
    -.highlight .s1 { color: #4070A0 } /* Literal.String.Single */
    +.highlight .s1 { color: #4070a0 } /* Literal.String.Single */
     .highlight .ss { color: #517918 } /* Literal.String.Symbol */
     .highlight .bp { color: #007020 } /* Name.Builtin.Pseudo */
    -.highlight .fm { color: #06287E } /* Name.Function.Magic */
    -.highlight .vc { color: #BB60D5 } /* Name.Variable.Class */
    -.highlight .vg { color: #BB60D5 } /* Name.Variable.Global */
    -.highlight .vi { color: #BB60D5 } /* Name.Variable.Instance */
    -.highlight .vm { color: #BB60D5 } /* Name.Variable.Magic */
    +.highlight .fm { color: #06287e } /* Name.Function.Magic */
    +.highlight .vc { color: #bb60d5 } /* Name.Variable.Class */
    +.highlight .vg { color: #bb60d5 } /* Name.Variable.Global */
    +.highlight .vi { color: #bb60d5 } /* Name.Variable.Instance */
    +.highlight .vm { color: #bb60d5 } /* Name.Variable.Magic */
     .highlight .il { color: #208050 } /* Literal.Number.Integer.Long */
    \ No newline at end of file
    diff --git a/docs/docs_troubleshoot/troubleshoots/bias_correction.html b/docs/docs_troubleshoot/troubleshoots/bias_correction.html
    index 14ba9f6be..839a46043 100644
    --- a/docs/docs_troubleshoot/troubleshoots/bias_correction.html
    +++ b/docs/docs_troubleshoot/troubleshoots/bias_correction.html
    @@ -7,7 +7,7 @@
         
     
         Bias Correction — TroubleShooting Documentation (MCT XQuant Extension Tool): ver 1.0
    -    
    +    
         
         
         
    diff --git a/docs/docs_troubleshoot/troubleshoots/enabling_hessian-based_mixed_precision.html b/docs/docs_troubleshoot/troubleshoots/enabling_hessian-based_mixed_precision.html
    index a1aa13592..9e72dbe68 100644
    --- a/docs/docs_troubleshoot/troubleshoots/enabling_hessian-based_mixed_precision.html
    +++ b/docs/docs_troubleshoot/troubleshoots/enabling_hessian-based_mixed_precision.html
    @@ -7,7 +7,7 @@
         
     
         Enabling Hessian-based Mixed Precision — TroubleShooting Documentation (MCT XQuant Extension Tool): ver 1.0
    -    
    +    
         
         
         
    diff --git a/docs/docs_troubleshoot/troubleshoots/gptq-gradient_based_post_training_quantization.html b/docs/docs_troubleshoot/troubleshoots/gptq-gradient_based_post_training_quantization.html
    index acd49af61..263064897 100644
    --- a/docs/docs_troubleshoot/troubleshoots/gptq-gradient_based_post_training_quantization.html
    +++ b/docs/docs_troubleshoot/troubleshoots/gptq-gradient_based_post_training_quantization.html
    @@ -7,7 +7,7 @@
         
     
         GPTQ - Gradient-Based Post Training Quantization — TroubleShooting Documentation (MCT XQuant Extension Tool): ver 1.0
    -    
    +    
         
         
         
    diff --git a/docs/docs_troubleshoot/troubleshoots/mixed_precision_with_model_output_loss_objective.html b/docs/docs_troubleshoot/troubleshoots/mixed_precision_with_model_output_loss_objective.html
    index ad0957cbb..7286296f5 100644
    --- a/docs/docs_troubleshoot/troubleshoots/mixed_precision_with_model_output_loss_objective.html
    +++ b/docs/docs_troubleshoot/troubleshoots/mixed_precision_with_model_output_loss_objective.html
    @@ -7,7 +7,7 @@
         
     
         Mixed Precision with model output loss objective — TroubleShooting Documentation (MCT XQuant Extension Tool): ver 1.0
    -    
    +    
         
         
         
    @@ -62,7 +62,7 @@ 

    Solution

    MCT offers an API to adjust the Mixed Precision objective method (MpDistanceWeighting).

    Set the distance_weighting_method attribute to MpDistanceWeighting.LAST_LAYER in the MixedPrecisionQuantizationConfig of the CoreConfig.

    By emphasizing a loss function that places greater importance on enhancing the model’s quantized output, users can mitigate the risk of detrimental precision reductions in the last layer.

    -
    from model_compression_toolkit.core.common.mixed_precision import MpDistanceWeighting
    +
    from model_compression_toolkit.core.common.mixed_precision import MpDistanceWeighting
     
     mixed_precision_config = mct.core.MixedPrecisionQuantizationConfig(distance_weighting_method=MpDistanceWeighting.LAST_LAYER)
     core_config = mct.core.CoreConfig(mixed_precision_config=mixed_precision_config)
    diff --git a/docs/docs_troubleshoot/troubleshoots/outlier_removal.html b/docs/docs_troubleshoot/troubleshoots/outlier_removal.html
    index fd731cd2d..ef4b50923 100644
    --- a/docs/docs_troubleshoot/troubleshoots/outlier_removal.html
    +++ b/docs/docs_troubleshoot/troubleshoots/outlier_removal.html
    @@ -7,7 +7,7 @@
         
     
         Outlier Removal — TroubleShooting Documentation (MCT XQuant Extension Tool): ver 1.0
    -    
    +    
         
         
         
    diff --git a/docs/docs_troubleshoot/troubleshoots/representative_and_validation_dataset_mismatch.html b/docs/docs_troubleshoot/troubleshoots/representative_and_validation_dataset_mismatch.html
    index bdab2b0fb..0efd370fd 100644
    --- a/docs/docs_troubleshoot/troubleshoots/representative_and_validation_dataset_mismatch.html
    +++ b/docs/docs_troubleshoot/troubleshoots/representative_and_validation_dataset_mismatch.html
    @@ -7,7 +7,7 @@
         
     
         Representative and Validation Dataset Mismatch — TroubleShooting Documentation (MCT XQuant Extension Tool): ver 1.0
    -    
    +    
         
         
         
    diff --git a/docs/docs_troubleshoot/troubleshoots/representative_dataset_size_and_diversity.html b/docs/docs_troubleshoot/troubleshoots/representative_dataset_size_and_diversity.html
    index ee4a881f8..f901696c8 100644
    --- a/docs/docs_troubleshoot/troubleshoots/representative_dataset_size_and_diversity.html
    +++ b/docs/docs_troubleshoot/troubleshoots/representative_dataset_size_and_diversity.html
    @@ -7,7 +7,7 @@
         
     
         Representative Dataset size and diversity — TroubleShooting Documentation (MCT XQuant Extension Tool): ver 1.0
    -    
    +    
         
         
         
    diff --git a/docs/docs_troubleshoot/troubleshoots/shift_negative_activation.html b/docs/docs_troubleshoot/troubleshoots/shift_negative_activation.html
    index 7d7a2338b..fd5fd1d39 100644
    --- a/docs/docs_troubleshoot/troubleshoots/shift_negative_activation.html
    +++ b/docs/docs_troubleshoot/troubleshoots/shift_negative_activation.html
    @@ -7,7 +7,7 @@
         
     
         Shift Negative Activation — TroubleShooting Documentation (MCT XQuant Extension Tool): ver 1.0
    -    
    +    
         
         
         
    diff --git a/docs/docs_troubleshoot/troubleshoots/threhold_selection_error_method.html b/docs/docs_troubleshoot/troubleshoots/threhold_selection_error_method.html
    index 8b3059191..2b2fb42df 100644
    --- a/docs/docs_troubleshoot/troubleshoots/threhold_selection_error_method.html
    +++ b/docs/docs_troubleshoot/troubleshoots/threhold_selection_error_method.html
    @@ -7,7 +7,7 @@
         
     
         Threshold selection error method — TroubleShooting Documentation (MCT XQuant Extension Tool): ver 1.0
    -    
    +    
         
         
         
    diff --git a/docs/docs_troubleshoot/troubleshoots/unbalanced_concatenation.html b/docs/docs_troubleshoot/troubleshoots/unbalanced_concatenation.html
    index c95c95fa6..7f38ead4d 100644
    --- a/docs/docs_troubleshoot/troubleshoots/unbalanced_concatenation.html
    +++ b/docs/docs_troubleshoot/troubleshoots/unbalanced_concatenation.html
    @@ -7,7 +7,7 @@
         
     
         Unbalanced “concatenation” — TroubleShooting Documentation (MCT XQuant Extension Tool): ver 1.0
    -    
    +    
         
         
         
    diff --git a/docs/docs_troubleshoot/troubleshoots/using_more_samples_in_mixed_precision_quantization.html b/docs/docs_troubleshoot/troubleshoots/using_more_samples_in_mixed_precision_quantization.html
    index 50db9dad6..e1a640647 100644
    --- a/docs/docs_troubleshoot/troubleshoots/using_more_samples_in_mixed_precision_quantization.html
    +++ b/docs/docs_troubleshoot/troubleshoots/using_more_samples_in_mixed_precision_quantization.html
    @@ -7,7 +7,7 @@
         
     
         Using more samples in Mixed Precision quantization — TroubleShooting Documentation (MCT XQuant Extension Tool): ver 1.0
    -    
    +    
         
         
         
    diff --git a/docs/genindex.html b/docs/genindex.html
    index e1f138f60..4b0eddcc2 100644
    --- a/docs/genindex.html
    +++ b/docs/genindex.html
    @@ -6,7 +6,7 @@
         
         
         Index — MCT Documentation: ver 2.6.0
    -    
    +    
         
         
         
    diff --git a/docs/guidelines/XQuant_Extension_Tool.html b/docs/guidelines/XQuant_Extension_Tool.html
    index 84606063a..5970df91c 100644
    --- a/docs/guidelines/XQuant_Extension_Tool.html
    +++ b/docs/guidelines/XQuant_Extension_Tool.html
    @@ -7,7 +7,7 @@
         
     
         XQuant Extension Tool — MCT Documentation: ver 2.6.0
    -    
    +    
         
         
         
    @@ -86,7 +86,7 @@ 

    How to Runthe XQuant tutorial with xquant_report_troubleshoot_pytorch_experimental.

    -
    from model_compression_toolkit.xquant import xquant_report_troubleshoot_pytorch_experimental
    +
    from model_compression_toolkit.xquant import xquant_report_troubleshoot_pytorch_experimental
     # xquant_report_pytorch_experimental --> xquant_report_troubleshoot_pytorch_experimental
     result = xquant_report_troubleshoot_pytorch_experimental(
                 float_model,
    @@ -111,7 +111,7 @@ 

    How to Runxquant_config = XQuantConfig(report_dir='./log_tensorboard_xquant') -from model_compression_toolkit.xquant import xquant_report_troubleshoot_pytorch_experimental +from model_compression_toolkit.xquant import xquant_report_troubleshoot_pytorch_experimental result = xquant_report_troubleshoot_pytorch_experimental( float_model, quantized_model, @@ -208,7 +208,7 @@

    Understanding the Judgeable Troubleshoots
  • Outlier Removal

  • -
    WARNING:Model Compression Toolkit:There are output values that deviate significantly from the average. Refer to the following images and the TroubleShooting Documentation (MCT XQuant Extension Tool) of 'Outlier Removal'.
    +
    WARNING:Model Compression Toolkit:There are output values that deviate significantly from the average. Refer to the following images and the TroubleShooting Documentation (MCT XQuant Extension Tool) of 'Outlier Removal'.
     
      diff --git a/docs/guidelines/visualization.html b/docs/guidelines/visualization.html index 5783d82f7..e713d300b 100644 --- a/docs/guidelines/visualization.html +++ b/docs/guidelines/visualization.html @@ -7,7 +7,7 @@ Visualization within TensorBoard — MCT Documentation: ver 2.6.0 - + @@ -50,7 +50,7 @@

      Navigation

      Visualization within TensorBoard

      One may log various graphs and data collected in different phases of the model quantization and display them within the Tensorboard UI. To use it, all you have to do is to set a logger path. Setting a path is done by calling set_log_folder.

      -
      import model_compression_toolkit as mct
      +
      import model_compression_toolkit as mct
       mct.set_log_folder('/logger/dir/path')
       
      diff --git a/docs/index.html b/docs/index.html index 2ec291518..0a5b860a4 100644 --- a/docs/index.html +++ b/docs/index.html @@ -7,7 +7,7 @@ Model Compression Toolkit User Guide — MCT Documentation: ver 2.6.0 - + diff --git a/docs/search.html b/docs/search.html index 740d162e7..37d780dc0 100644 --- a/docs/search.html +++ b/docs/search.html @@ -6,7 +6,7 @@ Search — MCT Documentation: ver 2.6.0 - + diff --git a/docs/searchindex.js b/docs/searchindex.js index 0bba14246..dac0e8f35 100644 --- a/docs/searchindex.js +++ b/docs/searchindex.js @@ -1 +1 @@ -Search.setIndex({"alltitles": {"API Docs": [[13, null]], "API Documentation": [[50, "api-documentation"]], "About XQuant Extension Tool": [[48, "about-xquant-extension-tool"]], "Actions": [[43, "actions"]], "Attribute Filters": [[42, "attribute-filters"]], "AttributeQuantizationConfig": [[45, "attributequantizationconfig"]], "BNLayerWeightingType": [[1, "bnlayerweightingtype"]], "BaseKerasTrainableQuantizer": [[46, "basekerastrainablequantizer"]], "BasePytorchTrainableQuantizer": [[46, "basepytorchtrainablequantizer"]], "BatchNormAlignemntLossType": [[1, "batchnormalignemntlosstype"]], "BitWidthConfig": [[0, null]], "ChannelAxis": [[3, "channelaxis"]], "ChannelsFilteringStrategy": [[6, "channelsfilteringstrategy"]], "CoreConfig": [[39, null]], "Cosine Similarity Comparison": [[49, "cosine-similarity-comparison"]], "Data Generation Configuration": [[1, null]], "DataInitType": [[1, "datainittype"]], "DebugConfig": [[40, "debugconfig"]], "DefaultDict Class": [[2, null]], "EditRule": [[43, "editrule"]], "Enable a Logger": [[35, null]], "Filters": [[43, "filters"]], "FrameworkInfo Class": [[3, null]], "Fusing": [[45, "fusing"]], "GPTQHessianScoresConfig Class": [[4, "gptqhessianscoresconfig-class"]], "Get DataGenerationConfig for Keras Models": [[14, null]], "Get DataGenerationConfig for Pytorch Models": [[16, null]], "Get GradientPTQConfig for Keras Models": [[15, null]], "Get GradientPTQConfig for Pytorch Models": [[17, null]], "Get Resource Utilization information for Keras Models": [[22, null]], "Get Resource Utilization information for PyTorch Models": [[30, null]], "Get TargetPlatformCapabilities for sdsp converter version": [[19, null]], "Get TargetPlatformCapabilities for tpc version": [[18, null]], "GradientPTQConfig Class": [[4, null]], "GradualActivationQuantizationConfig": [[4, "gradualactivationquantizationconfig"]], "How to Run": [[48, "how-to-run"]], "ImageGranularity": [[1, "imagegranularity"]], "ImageNormalizationType": [[1, "imagenormalizationtype"]], "ImagePipelineType": [[1, "imagepipelinetype"]], "ImportanceMetric": [[6, "importancemetric"]], "Indices and tables": [[13, "indices-and-tables"]], "Install": [[50, "install"]], "Keras Data Generation": [[20, null]], "Keras Gradient Based Post Training Quantization": [[21, null]], "Keras Post Training Quantization": [[24, null]], "Keras Quantization Aware Training Model Finalize": [[26, null]], "Keras Quantization Aware Training Model Init": [[27, null]], "Keras Structured Pruning": [[25, null]], "Keras Tutorial": [[41, "keras-tutorial"]], "KerasExportSerializationFormat": [[41, "kerasexportserializationformat"]], "Layer Attributes Filters": [[42, null]], "Load Quantized Keras Model": [[23, null]], "MCTQ": [[41, "mctq"]], "MCTQ Quantization Format": [[41, "mctq-quantization-format"]], "ManualBitWidthSelection": [[0, "manualbitwidthselection"]], "Mixed-precision Configuration Bit-width": [[49, "mixed-precision-configuration-bit-width"]], "MixedPrecisionQuantizationConfig": [[5, null]], "Model Compression Toolkit User Guide": [[50, null]], "MpDistanceWeighting": [[5, "mpdistanceweighting"]], "MpMetricNormalization": [[5, "mpmetricnormalization"]], "ONNX": [[41, "onnx"]], "ONNX model output names": [[41, "onnx-model-output-names"]], "ONNX opset version": [[41, "onnx-opset-version"]], "OpQuantizationConfig": [[45, "opquantizationconfig"]], "OperatorSetGroup": [[45, "operatorsetgroup"]], "OperatorsSet": [[45, "operatorsset"]], "OutputLossType": [[1, "outputlosstype"]], "Overall Process Flow": [[48, "overall-process-flow"]], "Overview": [[50, "overview"]], "Pruning Configuration": [[6, null]], "Pruning Information": [[7, null]], "PyTorch Quantization Aware Training Model Finalize": [[33, null]], "PyTorch Quantization Aware Training Model Init": [[34, null]], "Pytorch Data Generation": [[28, null]], "Pytorch Gradient Based Post Training Quantization": [[29, null]], "Pytorch Post Training Quantization": [[31, null]], "Pytorch Structured Pruning": [[32, null]], "Pytorch Tutorial": [[41, "pytorch-tutorial"]], "PytorchExportSerializationFormat": [[41, "pytorchexportserializationformat"]], "QATConfig": [[44, "qatconfig"]], "QFractionLinearAnnealingConfig": [[4, "qfractionlinearannealingconfig"]], "QuantizationConfig": [[8, null]], "QuantizationConfigOptions": [[45, "quantizationconfigoptions"]], "QuantizationErrorMethod": [[9, null]], "QuantizationFormat": [[41, "quantizationformat"]], "QuantizationMethod": [[45, "quantizationmethod"]], "Quickstart": [[50, "quickstart"]], "References": [[50, "references"]], "ResourceUtilization": [[10, null]], "RoundingType": [[4, "roundingtype"]], "SchedulerType": [[1, "schedulertype"]], "Supported Features": [[50, "supported-features"]], "TargetPlatformCapabilities": [[45, "targetplatformcapabilities"]], "Technical Constraints": [[50, "technical-constraints"]], "TrainableQuantizerActivationConfig": [[46, "trainablequantizeractivationconfig"]], "TrainableQuantizerWeightsConfig": [[46, "trainablequantizerweightsconfig"]], "TrainingMethod": [[44, "trainingmethod"], [46, "trainingmethod"]], "Understanding the General Troubleshoots": [[48, "understanding-the-general-troubleshoots"]], "Understanding the Judgeable Troubleshoots": [[48, "understanding-the-judgeable-troubleshoots"]], "Understanding the Quantization Error Graph": [[48, "understanding-the-quantization-error-graph"]], "Use exported model for inference": [[41, "use-exported-model-for-inference"]], "Visualization within TensorBoard": [[49, null]], "XQuant Configuration": [[12, null]], "XQuant Extension Tool": [[48, null]], "XQuant Report Keras": [[36, null]], "XQuant Report Pytorch": [[37, null]], "XQuant Report Troubleshoot Pytorch": [[38, null]], "XQuantConfig Format and Examples": [[48, "xquantconfig-format-and-examples"]], "XQuantConfig parameter": [[48, "id3"]], "core": [[13, "core"]], "data_generation": [[13, "data-generation"]], "debug_config Module": [[40, null]], "exporter": [[13, "exporter"]], "exporter Module": [[41, null]], "gptq": [[13, "gptq"]], "keras serialization format": [[41, "keras-serialization-format"]], "keras_export_model": [[41, "keras-export-model"]], "keras_load_quantized_model": [[13, "keras-load-quantized-model"]], "network_editor Module": [[43, null]], "pruning": [[13, "pruning"]], "ptq": [[13, "ptq"]], "pytorch_export_model": [[41, "pytorch-export-model"]], "qat": [[13, "qat"]], "qat_config Module": [[44, null]], "set_log_folder": [[13, "set-log-folder"]], "target_platform_capabilities": [[13, "target-platform-capabilities"]], "target_platform_capabilities Module": [[45, null]], "trainable_infrastructure": [[13, "trainable-infrastructure"]], "trainable_infrastructure Module": [[46, null]], "wrapper": [[11, null], [13, "wrapper"]], "xquant": [[13, "xquant"]]}, "docnames": ["api/api_docs/classes/BitWidthConfig", "api/api_docs/classes/DataGenerationConfig", "api/api_docs/classes/DefaultDict", "api/api_docs/classes/FrameworkInfo", "api/api_docs/classes/GradientPTQConfig", "api/api_docs/classes/MixedPrecisionQuantizationConfig", "api/api_docs/classes/PruningConfig", "api/api_docs/classes/PruningInfo", "api/api_docs/classes/QuantizationConfig", "api/api_docs/classes/QuantizationErrorMethod", "api/api_docs/classes/ResourceUtilization", "api/api_docs/classes/Wrapper", "api/api_docs/classes/XQuantConfig", "api/api_docs/index", "api/api_docs/methods/get_keras_data_generation_config", "api/api_docs/methods/get_keras_gptq_config", "api/api_docs/methods/get_pytorch_data_generation_config", "api/api_docs/methods/get_pytroch_gptq_config", "api/api_docs/methods/get_target_platform_capabilities", "api/api_docs/methods/get_target_platform_capabilities_sdsp", "api/api_docs/methods/keras_data_generation_experimental", "api/api_docs/methods/keras_gradient_post_training_quantization", "api/api_docs/methods/keras_kpi_data", "api/api_docs/methods/keras_load_quantizad_model", "api/api_docs/methods/keras_post_training_quantization", "api/api_docs/methods/keras_pruning_experimental", "api/api_docs/methods/keras_quantization_aware_training_finalize_experimental", "api/api_docs/methods/keras_quantization_aware_training_init_experimental", "api/api_docs/methods/pytorch_data_generation_experimental", "api/api_docs/methods/pytorch_gradient_post_training_quantization", "api/api_docs/methods/pytorch_kpi_data", "api/api_docs/methods/pytorch_post_training_quantization", "api/api_docs/methods/pytorch_pruning_experimental", "api/api_docs/methods/pytorch_quantization_aware_training_finalize_experimental", "api/api_docs/methods/pytorch_quantization_aware_training_init_experimental", "api/api_docs/methods/set_logger_path", "api/api_docs/methods/xquant_report_keras_experimental", "api/api_docs/methods/xquant_report_pytorch_experimental", "api/api_docs/methods/xquant_report_troubleshoot_pytorch_experimental", "api/api_docs/modules/core_config", "api/api_docs/modules/debug_config", "api/api_docs/modules/exporter", "api/api_docs/modules/layer_filters", "api/api_docs/modules/network_editor", "api/api_docs/modules/qat_config", "api/api_docs/modules/target_platform_capabilities", "api/api_docs/modules/trainable_infrastructure", "api/api_docs/notes/tpc_note", "guidelines/XQuant_Extension_Tool", "guidelines/visualization", "index"], "envversion": {"sphinx": 64, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2}, "filenames": ["api/api_docs/classes/BitWidthConfig.rst", "api/api_docs/classes/DataGenerationConfig.rst", "api/api_docs/classes/DefaultDict.rst", "api/api_docs/classes/FrameworkInfo.rst", "api/api_docs/classes/GradientPTQConfig.rst", "api/api_docs/classes/MixedPrecisionQuantizationConfig.rst", "api/api_docs/classes/PruningConfig.rst", "api/api_docs/classes/PruningInfo.rst", "api/api_docs/classes/QuantizationConfig.rst", "api/api_docs/classes/QuantizationErrorMethod.rst", "api/api_docs/classes/ResourceUtilization.rst", "api/api_docs/classes/Wrapper.rst", "api/api_docs/classes/XQuantConfig.rst", "api/api_docs/index.rst", "api/api_docs/methods/get_keras_data_generation_config.rst", "api/api_docs/methods/get_keras_gptq_config.rst", "api/api_docs/methods/get_pytorch_data_generation_config.rst", "api/api_docs/methods/get_pytroch_gptq_config.rst", "api/api_docs/methods/get_target_platform_capabilities.rst", "api/api_docs/methods/get_target_platform_capabilities_sdsp.rst", "api/api_docs/methods/keras_data_generation_experimental.rst", "api/api_docs/methods/keras_gradient_post_training_quantization.rst", "api/api_docs/methods/keras_kpi_data.rst", "api/api_docs/methods/keras_load_quantizad_model.rst", "api/api_docs/methods/keras_post_training_quantization.rst", "api/api_docs/methods/keras_pruning_experimental.rst", "api/api_docs/methods/keras_quantization_aware_training_finalize_experimental.rst", "api/api_docs/methods/keras_quantization_aware_training_init_experimental.rst", "api/api_docs/methods/pytorch_data_generation_experimental.rst", "api/api_docs/methods/pytorch_gradient_post_training_quantization.rst", "api/api_docs/methods/pytorch_kpi_data.rst", "api/api_docs/methods/pytorch_post_training_quantization.rst", "api/api_docs/methods/pytorch_pruning_experimental.rst", "api/api_docs/methods/pytorch_quantization_aware_training_finalize_experimental.rst", "api/api_docs/methods/pytorch_quantization_aware_training_init_experimental.rst", "api/api_docs/methods/set_logger_path.rst", "api/api_docs/methods/xquant_report_keras_experimental.rst", "api/api_docs/methods/xquant_report_pytorch_experimental.rst", "api/api_docs/methods/xquant_report_troubleshoot_pytorch_experimental.rst", "api/api_docs/modules/core_config.rst", "api/api_docs/modules/debug_config.rst", "api/api_docs/modules/exporter.rst", "api/api_docs/modules/layer_filters.rst", "api/api_docs/modules/network_editor.rst", "api/api_docs/modules/qat_config.rst", "api/api_docs/modules/target_platform_capabilities.rst", "api/api_docs/modules/trainable_infrastructure.rst", "api/api_docs/notes/tpc_note.rst", "guidelines/XQuant_Extension_Tool.rst", "guidelines/visualization.rst", "index.rst"], "indexentries": {"add_metadata (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.add_metadata", false]], "attributefilter (class in model_compression_toolkit.target_platform_capabilities)": [[42, "model_compression_toolkit.target_platform_capabilities.AttributeFilter", false]], "attributequantizationconfig (class in model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.AttributeQuantizationConfig", false]], "base_config (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.quantizationconfigoptions attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.QuantizationConfigOptions.base_config", false]], "basekerastrainablequantizer (class in model_compression_toolkit.trainable_infrastructure)": [[46, "model_compression_toolkit.trainable_infrastructure.BaseKerasTrainableQuantizer", false]], "basepytorchtrainablequantizer (class in model_compression_toolkit.trainable_infrastructure)": [[46, "model_compression_toolkit.trainable_infrastructure.BasePytorchTrainableQuantizer", false]], "batchnormalignemntlosstype (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.BatchNormAlignemntLossType", false]], "bit_width (model_compression_toolkit.core.common.quantization.bit_width_config.manualbitwidthselection attribute)": [[0, "model_compression_toolkit.core.common.quantization.bit_width_config.ManualBitWidthSelection.bit_width", false]], "bitwidthconfig (class in model_compression_toolkit.core)": [[0, "model_compression_toolkit.core.BitWidthConfig", false]], "bnlayerweightingtype (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.BNLayerWeightingType", false]], "changecandidatesactivationquantconfigattr (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.ChangeCandidatesActivationQuantConfigAttr", false]], "changecandidatesactivationquantizationmethod (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.ChangeCandidatesActivationQuantizationMethod", false]], "changecandidatesweightsquantconfigattr (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.ChangeCandidatesWeightsQuantConfigAttr", false]], "changecandidatesweightsquantizationmethod (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.ChangeCandidatesWeightsQuantizationMethod", false]], "changefinalactivationquantconfigattr (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.ChangeFinalActivationQuantConfigAttr", false]], "changefinalweightsquantconfigattr (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.ChangeFinalWeightsQuantConfigAttr", false]], "changefinalweightsquantizationmethod (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.ChangeFinalWeightsQuantizationMethod", false]], "changequantizationparamfunction (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.ChangeQuantizationParamFunction", false]], "channelaxis (class in model_compression_toolkit.core)": [[3, "model_compression_toolkit.core.ChannelAxis", false]], "channels_filtering_strategy (model_compression_toolkit.pruning.pruningconfig attribute)": [[6, "model_compression_toolkit.pruning.PruningConfig.channels_filtering_strategy", false]], "channelsfilteringstrategy (class in model_compression_toolkit.pruning)": [[6, "model_compression_toolkit.pruning.ChannelsFilteringStrategy", false]], "coreconfig (class in model_compression_toolkit.core)": [[39, "model_compression_toolkit.core.CoreConfig", false]], "datagenerationconfig (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.DataGenerationConfig", false]], "datainittype (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.DataInitType", false]], "debugconfig (class in model_compression_toolkit.core)": [[40, "model_compression_toolkit.core.DebugConfig", false]], "default_qco (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.default_qco", false]], "defaultdict (class in model_compression_toolkit)": [[2, "model_compression_toolkit.DefaultDict", false]], "editrule (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.EditRule", false]], "enable_weights_quantization (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.attributequantizationconfig attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.AttributeQuantizationConfig.enable_weights_quantization", false]], "eq (class in model_compression_toolkit.target_platform_capabilities)": [[42, "model_compression_toolkit.target_platform_capabilities.Eq", false]], "filter (model_compression_toolkit.core.common.quantization.bit_width_config.manualbitwidthselection attribute)": [[0, "model_compression_toolkit.core.common.quantization.bit_width_config.ManualBitWidthSelection.filter", false]], "frameworkinfo (class in model_compression_toolkit.core)": [[3, "model_compression_toolkit.core.FrameworkInfo", false]], "fuse_op_quantization_config (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.fusing attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.Fusing.fuse_op_quantization_config", false]], "fusing (class in model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.Fusing", false]], "fusing_patterns (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.fusing_patterns", false]], "get() (model_compression_toolkit.defaultdict method)": [[2, "model_compression_toolkit.DefaultDict.get", false]], "get_keras_data_generation_config() (in module model_compression_toolkit.data_generation)": [[14, "model_compression_toolkit.data_generation.get_keras_data_generation_config", false]], "get_keras_gptq_config() (in module model_compression_toolkit.gptq)": [[15, "model_compression_toolkit.gptq.get_keras_gptq_config", false]], "get_pytorch_data_generation_config() (in module model_compression_toolkit.data_generation)": [[16, "model_compression_toolkit.data_generation.get_pytorch_data_generation_config", false]], "get_pytorch_gptq_config() (in module model_compression_toolkit.gptq)": [[17, "model_compression_toolkit.gptq.get_pytorch_gptq_config", false]], "get_target_platform_capabilities() (in module model_compression_toolkit)": [[18, "model_compression_toolkit.get_target_platform_capabilities", false]], "get_target_platform_capabilities_sdsp() (in module model_compression_toolkit)": [[19, "model_compression_toolkit.get_target_platform_capabilities_sdsp", false]], "gptqhessianscoresconfig (class in model_compression_toolkit.gptq)": [[4, "model_compression_toolkit.gptq.GPTQHessianScoresConfig", false]], "gradientptqconfig (class in model_compression_toolkit.gptq)": [[4, "model_compression_toolkit.gptq.GradientPTQConfig", false]], "gradualactivationquantizationconfig (class in model_compression_toolkit.gptq)": [[4, "model_compression_toolkit.gptq.GradualActivationQuantizationConfig", false]], "greater (class in model_compression_toolkit.target_platform_capabilities)": [[42, "model_compression_toolkit.target_platform_capabilities.Greater", false]], "greatereq (class in model_compression_toolkit.target_platform_capabilities)": [[42, "model_compression_toolkit.target_platform_capabilities.GreaterEq", false]], "imagegranularity (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.ImageGranularity", false]], "imagenormalizationtype (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.ImageNormalizationType", false]], "imagepipelinetype (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.ImagePipelineType", false]], "importance_metric (model_compression_toolkit.pruning.pruningconfig attribute)": [[6, "model_compression_toolkit.pruning.PruningConfig.importance_metric", false]], "importance_scores (model_compression_toolkit.pruning.pruninginfo property)": [[7, "model_compression_toolkit.pruning.PruningInfo.importance_scores", false]], "importancemetric (class in model_compression_toolkit.pruning)": [[6, "model_compression_toolkit.pruning.ImportanceMetric", false]], "insert_preserving_quantizers (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.insert_preserving_quantizers", false]], "is_simd_padding (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.is_simd_padding", false]], "keras_data_generation_experimental() (in module model_compression_toolkit.data_generation)": [[20, "model_compression_toolkit.data_generation.keras_data_generation_experimental", false]], "keras_export_model (class in model_compression_toolkit.exporter)": [[41, "model_compression_toolkit.exporter.keras_export_model", false]], "keras_gradient_post_training_quantization() (in module model_compression_toolkit.gptq)": [[21, "model_compression_toolkit.gptq.keras_gradient_post_training_quantization", false]], "keras_load_quantized_model() (in module model_compression_toolkit)": [[23, "model_compression_toolkit.keras_load_quantized_model", false]], "keras_post_training_quantization() (in module model_compression_toolkit.ptq)": [[24, "model_compression_toolkit.ptq.keras_post_training_quantization", false]], "keras_pruning_experimental() (in module model_compression_toolkit.pruning)": [[25, "model_compression_toolkit.pruning.keras_pruning_experimental", false]], "keras_quantization_aware_training_finalize_experimental() (in module model_compression_toolkit.qat)": [[26, "model_compression_toolkit.qat.keras_quantization_aware_training_finalize_experimental", false]], "keras_quantization_aware_training_init_experimental() (in module model_compression_toolkit.qat)": [[27, "model_compression_toolkit.qat.keras_quantization_aware_training_init_experimental", false]], "keras_resource_utilization_data() (in module model_compression_toolkit.core)": [[22, "model_compression_toolkit.core.keras_resource_utilization_data", false]], "kerasexportserializationformat (class in model_compression_toolkit.exporter)": [[41, "model_compression_toolkit.exporter.KerasExportSerializationFormat", false]], "keys() (model_compression_toolkit.defaultdict method)": [[2, "model_compression_toolkit.DefaultDict.keys", false]], "lut_values_bitwidth (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.attributequantizationconfig attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.AttributeQuantizationConfig.lut_values_bitwidth", false]], "manual_activation_bit_width_selection_list (model_compression_toolkit.core.bitwidthconfig attribute)": [[0, "model_compression_toolkit.core.BitWidthConfig.manual_activation_bit_width_selection_list", false]], "manual_weights_bit_width_selection_list (model_compression_toolkit.core.bitwidthconfig attribute)": [[0, "model_compression_toolkit.core.BitWidthConfig.manual_weights_bit_width_selection_list", false]], "manualbitwidthselection (class in model_compression_toolkit.core.common.quantization.bit_width_config)": [[0, "model_compression_toolkit.core.common.quantization.bit_width_config.ManualBitWidthSelection", false]], "mctwrapper (class in model_compression_toolkit.wrapper.mct_wrapper)": [[11, "model_compression_toolkit.wrapper.mct_wrapper.MCTWrapper", false]], "mixedprecisionquantizationconfig (class in model_compression_toolkit.core)": [[5, "model_compression_toolkit.core.MixedPrecisionQuantizationConfig", false]], "mpdistanceweighting (class in model_compression_toolkit.core)": [[5, "model_compression_toolkit.core.MpDistanceWeighting", false]], "mpmetricnormalization (class in model_compression_toolkit.core)": [[5, "model_compression_toolkit.core.MpMetricNormalization", false]], "name (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.fusing attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.Fusing.name", false]], "name (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.operatorsetgroup attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorSetGroup.name", false]], "name (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.operatorsset attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorsSet.name", false]], "name (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.name", false]], "nodenamefilter (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.NodeNameFilter", false]], "nodenamescopefilter (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.NodeNameScopeFilter", false]], "nodetypefilter (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.NodeTypeFilter", false]], "noteq (class in model_compression_toolkit.target_platform_capabilities)": [[42, "model_compression_toolkit.target_platform_capabilities.NotEq", false]], "num_score_approximations (model_compression_toolkit.pruning.pruningconfig attribute)": [[6, "model_compression_toolkit.pruning.PruningConfig.num_score_approximations", false]], "operator_groups (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.fusing attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.Fusing.operator_groups", false]], "operator_set (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.operator_set", false]], "operators_set (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.operatorsetgroup attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorSetGroup.operators_set", false]], "operatorsetgroup (class in model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorSetGroup", false]], "operatorsset (class in model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorsSet", false]], "opquantizationconfig (class in model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OpQuantizationConfig", false]], "outputlosstype (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.OutputLossType", false]], "pruning_masks (model_compression_toolkit.pruning.pruninginfo property)": [[7, "model_compression_toolkit.pruning.PruningInfo.pruning_masks", false]], "pruningconfig (class in model_compression_toolkit.pruning)": [[6, "model_compression_toolkit.pruning.PruningConfig", false]], "pruninginfo (class in model_compression_toolkit.pruning)": [[7, "model_compression_toolkit.pruning.PruningInfo", false]], "pytorch_data_generation_experimental() (in module model_compression_toolkit.data_generation)": [[28, "model_compression_toolkit.data_generation.pytorch_data_generation_experimental", false]], "pytorch_export_model (class in model_compression_toolkit.exporter)": [[41, "model_compression_toolkit.exporter.pytorch_export_model", false]], "pytorch_gradient_post_training_quantization() (in module model_compression_toolkit.gptq)": [[29, "model_compression_toolkit.gptq.pytorch_gradient_post_training_quantization", false]], "pytorch_post_training_quantization() (in module model_compression_toolkit.ptq)": [[31, "model_compression_toolkit.ptq.pytorch_post_training_quantization", false]], "pytorch_pruning_experimental() (in module model_compression_toolkit.pruning)": [[32, "model_compression_toolkit.pruning.pytorch_pruning_experimental", false]], "pytorch_quantization_aware_training_finalize_experimental() (in module model_compression_toolkit.qat)": [[33, "model_compression_toolkit.qat.pytorch_quantization_aware_training_finalize_experimental", false]], "pytorch_quantization_aware_training_init_experimental() (in module model_compression_toolkit.qat)": [[34, "model_compression_toolkit.qat.pytorch_quantization_aware_training_init_experimental", false]], "pytorch_resource_utilization_data() (in module model_compression_toolkit.core)": [[30, "model_compression_toolkit.core.pytorch_resource_utilization_data", false]], "pytorchexportserializationformat (class in model_compression_toolkit.exporter)": [[41, "model_compression_toolkit.exporter.PytorchExportSerializationFormat", false]], "qatconfig (class in model_compression_toolkit.qat)": [[44, "model_compression_toolkit.qat.QATConfig", false]], "qc_options (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.operatorsset attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorsSet.qc_options", false]], "qfractionlinearannealingconfig (class in model_compression_toolkit.gptq)": [[4, "model_compression_toolkit.gptq.QFractionLinearAnnealingConfig", false]], "quantization_configurations (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.quantizationconfigoptions attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.QuantizationConfigOptions.quantization_configurations", false]], "quantizationconfig (class in model_compression_toolkit.core)": [[8, "model_compression_toolkit.core.QuantizationConfig", false]], "quantizationconfigoptions (class in model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.QuantizationConfigOptions", false]], "quantizationerrormethod (class in model_compression_toolkit.core)": [[9, "model_compression_toolkit.core.QuantizationErrorMethod", false]], "quantizationformat (class in model_compression_toolkit.exporter)": [[41, "model_compression_toolkit.exporter.QuantizationFormat", false]], "quantizationmethod (class in model_compression_toolkit.target_platform_capabilities)": [[45, "model_compression_toolkit.target_platform_capabilities.QuantizationMethod", false]], "quantize_and_export() (model_compression_toolkit.wrapper.mct_wrapper.mctwrapper method)": [[11, "model_compression_toolkit.wrapper.mct_wrapper.MCTWrapper.quantize_and_export", false]], "resourceutilization (class in model_compression_toolkit.core)": [[10, "model_compression_toolkit.core.ResourceUtilization", false]], "roundingtype (class in model_compression_toolkit.gptq)": [[4, "model_compression_toolkit.gptq.RoundingType", false]], "schedulertype (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.SchedulerType", false]], "schema_version (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.SCHEMA_VERSION", false]], "set_log_folder() (in module model_compression_toolkit)": [[35, "model_compression_toolkit.set_log_folder", false]], "smaller (class in model_compression_toolkit.target_platform_capabilities)": [[42, "model_compression_toolkit.target_platform_capabilities.Smaller", false]], "smallereq (class in model_compression_toolkit.target_platform_capabilities)": [[42, "model_compression_toolkit.target_platform_capabilities.SmallerEq", false]], "targetplatformcapabilities (class in model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities", false]], "tpc_minor_version (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.tpc_minor_version", false]], "tpc_patch_version (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.tpc_patch_version", false]], "tpc_platform_type (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.tpc_platform_type", false]], "trainablequantizeractivationconfig (class in model_compression_toolkit.trainable_infrastructure)": [[46, "model_compression_toolkit.trainable_infrastructure.TrainableQuantizerActivationConfig", false]], "trainablequantizerweightsconfig (class in model_compression_toolkit.trainable_infrastructure)": [[46, "model_compression_toolkit.trainable_infrastructure.TrainableQuantizerWeightsConfig", false]], "trainingmethod (class in model_compression_toolkit.trainable_infrastructure)": [[46, "model_compression_toolkit.trainable_infrastructure.TrainingMethod", false]], "type (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.operatorsset attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorsSet.type", false]], "weights_n_bits (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.attributequantizationconfig attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.AttributeQuantizationConfig.weights_n_bits", false]], "weights_per_channel_threshold (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.attributequantizationconfig attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.AttributeQuantizationConfig.weights_per_channel_threshold", false]], "weights_quantization_method (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.attributequantizationconfig attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.AttributeQuantizationConfig.weights_quantization_method", false]], "xquant_report_keras_experimental() (in module model_compression_toolkit.xquant.keras.facade_xquant_report)": [[36, "model_compression_toolkit.xquant.keras.facade_xquant_report.xquant_report_keras_experimental", false]], "xquant_report_pytorch_experimental() (in module model_compression_toolkit.xquant.pytorch.facade_xquant_report)": [[37, "model_compression_toolkit.xquant.pytorch.facade_xquant_report.xquant_report_pytorch_experimental", false]], "xquant_report_troubleshoot_pytorch_experimental() (in module model_compression_toolkit.xquant.pytorch.facade_xquant_report)": [[38, "model_compression_toolkit.xquant.pytorch.facade_xquant_report.xquant_report_troubleshoot_pytorch_experimental", false]], "xquantconfig (class in model_compression_toolkit.xquant.common.xquant_config)": [[12, "model_compression_toolkit.xquant.common.xquant_config.XQuantConfig", false]]}, "objects": {"model_compression_toolkit": [[2, 0, 1, "", "DefaultDict"], [18, 3, 1, "", "get_target_platform_capabilities"], [19, 3, 1, "", "get_target_platform_capabilities_sdsp"], [23, 3, 1, "", "keras_load_quantized_model"], [35, 3, 1, "", "set_log_folder"]], "model_compression_toolkit.DefaultDict": [[2, 1, 1, "", "get"], [2, 1, 1, "", "keys"]], "model_compression_toolkit.core": [[0, 0, 1, "", "BitWidthConfig"], [3, 0, 1, "", "ChannelAxis"], [39, 0, 1, "", "CoreConfig"], [40, 0, 1, "", "DebugConfig"], [3, 0, 1, "", "FrameworkInfo"], [5, 0, 1, "", "MixedPrecisionQuantizationConfig"], [5, 0, 1, "", "MpDistanceWeighting"], [5, 0, 1, "", "MpMetricNormalization"], [8, 0, 1, "", "QuantizationConfig"], [9, 0, 1, "", "QuantizationErrorMethod"], [10, 0, 1, "", "ResourceUtilization"], [22, 3, 1, "", "keras_resource_utilization_data"], [30, 3, 1, "", "pytorch_resource_utilization_data"]], "model_compression_toolkit.core.BitWidthConfig": [[0, 2, 1, "", "manual_activation_bit_width_selection_list"], [0, 2, 1, "", "manual_weights_bit_width_selection_list"]], "model_compression_toolkit.core.common.quantization.bit_width_config": [[0, 0, 1, "", "ManualBitWidthSelection"]], "model_compression_toolkit.core.common.quantization.bit_width_config.ManualBitWidthSelection": [[0, 2, 1, "", "bit_width"], [0, 2, 1, "", "filter"]], "model_compression_toolkit.core.network_editor": [[43, 0, 1, "", "ChangeCandidatesActivationQuantConfigAttr"], [43, 0, 1, "", "ChangeCandidatesActivationQuantizationMethod"], [43, 0, 1, "", "ChangeCandidatesWeightsQuantConfigAttr"], [43, 0, 1, "", "ChangeCandidatesWeightsQuantizationMethod"], [43, 0, 1, "", "ChangeFinalActivationQuantConfigAttr"], [43, 0, 1, "", "ChangeFinalWeightsQuantConfigAttr"], [43, 0, 1, "", "ChangeFinalWeightsQuantizationMethod"], [43, 0, 1, "", "ChangeQuantizationParamFunction"], [43, 0, 1, "", "EditRule"], [43, 0, 1, "", "NodeNameFilter"], [43, 0, 1, "", "NodeNameScopeFilter"], [43, 0, 1, "", "NodeTypeFilter"]], "model_compression_toolkit.data_generation": [[1, 0, 1, "", "BNLayerWeightingType"], [1, 0, 1, "", "BatchNormAlignemntLossType"], [1, 0, 1, "", "DataGenerationConfig"], [1, 0, 1, "", "DataInitType"], [1, 0, 1, "", "ImageGranularity"], [1, 0, 1, "", "ImageNormalizationType"], [1, 0, 1, "", "ImagePipelineType"], [1, 0, 1, "", "OutputLossType"], [1, 0, 1, "", "SchedulerType"], [14, 3, 1, "", "get_keras_data_generation_config"], [16, 3, 1, "", "get_pytorch_data_generation_config"], [20, 3, 1, "", "keras_data_generation_experimental"], [28, 3, 1, "", "pytorch_data_generation_experimental"]], "model_compression_toolkit.exporter": [[41, 0, 1, "", "KerasExportSerializationFormat"], [41, 0, 1, "", "PytorchExportSerializationFormat"], [41, 0, 1, "", "QuantizationFormat"], [41, 0, 1, "", "keras_export_model"], [41, 0, 1, "", "pytorch_export_model"]], "model_compression_toolkit.gptq": [[4, 0, 1, "", "GPTQHessianScoresConfig"], [4, 0, 1, "", "GradientPTQConfig"], [4, 0, 1, "", "GradualActivationQuantizationConfig"], [4, 0, 1, "", "QFractionLinearAnnealingConfig"], [4, 0, 1, "", "RoundingType"], [15, 3, 1, "", "get_keras_gptq_config"], [17, 3, 1, "", "get_pytorch_gptq_config"], [21, 3, 1, "", "keras_gradient_post_training_quantization"], [29, 3, 1, "", "pytorch_gradient_post_training_quantization"]], "model_compression_toolkit.pruning": [[6, 0, 1, "", "ChannelsFilteringStrategy"], [6, 0, 1, "", "ImportanceMetric"], [6, 0, 1, "", "PruningConfig"], [7, 0, 1, "", "PruningInfo"], [25, 3, 1, "", "keras_pruning_experimental"], [32, 3, 1, "", "pytorch_pruning_experimental"]], "model_compression_toolkit.pruning.PruningConfig": [[6, 2, 1, "", "channels_filtering_strategy"], [6, 2, 1, "", "importance_metric"], [6, 2, 1, "", "num_score_approximations"]], "model_compression_toolkit.pruning.PruningInfo": [[7, 4, 1, "", "importance_scores"], [7, 4, 1, "", "pruning_masks"]], "model_compression_toolkit.ptq": [[24, 3, 1, "", "keras_post_training_quantization"], [31, 3, 1, "", "pytorch_post_training_quantization"]], "model_compression_toolkit.qat": [[44, 0, 1, "", "QATConfig"], [26, 3, 1, "", "keras_quantization_aware_training_finalize_experimental"], [27, 3, 1, "", "keras_quantization_aware_training_init_experimental"], [33, 3, 1, "", "pytorch_quantization_aware_training_finalize_experimental"], [34, 3, 1, "", "pytorch_quantization_aware_training_init_experimental"]], "model_compression_toolkit.target_platform_capabilities": [[42, 0, 1, "", "AttributeFilter"], [42, 0, 1, "", "Eq"], [42, 0, 1, "", "Greater"], [42, 0, 1, "", "GreaterEq"], [42, 0, 1, "", "NotEq"], [45, 0, 1, "", "QuantizationMethod"], [42, 0, 1, "", "Smaller"], [42, 0, 1, "", "SmallerEq"]], "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema": [[45, 0, 1, "", "AttributeQuantizationConfig"], [45, 0, 1, "", "Fusing"], [45, 0, 1, "", "OpQuantizationConfig"], [45, 0, 1, "", "OperatorSetGroup"], [45, 0, 1, "", "OperatorsSet"], [45, 0, 1, "", "QuantizationConfigOptions"], [45, 0, 1, "", "TargetPlatformCapabilities"]], "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.AttributeQuantizationConfig": [[45, 2, 1, "", "enable_weights_quantization"], [45, 2, 1, "", "lut_values_bitwidth"], [45, 2, 1, "", "weights_n_bits"], [45, 2, 1, "", "weights_per_channel_threshold"], [45, 2, 1, "", "weights_quantization_method"]], "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.Fusing": [[45, 2, 1, "", "fuse_op_quantization_config"], [45, 2, 1, "", "name"], [45, 2, 1, "", "operator_groups"]], "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorSetGroup": [[45, 2, 1, "", "name"], [45, 2, 1, "", "operators_set"]], "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorsSet": [[45, 2, 1, "", "name"], [45, 2, 1, "", "qc_options"], [45, 2, 1, "", "type"]], "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.QuantizationConfigOptions": [[45, 2, 1, "", "base_config"], [45, 2, 1, "", "quantization_configurations"]], "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities": [[45, 2, 1, "", "SCHEMA_VERSION"], [45, 2, 1, "", "add_metadata"], [45, 2, 1, "", "default_qco"], [45, 2, 1, "", "fusing_patterns"], [45, 2, 1, "", "insert_preserving_quantizers"], [45, 2, 1, "", "is_simd_padding"], [45, 2, 1, "", "name"], [45, 2, 1, "", "operator_set"], [45, 2, 1, "", "tpc_minor_version"], [45, 2, 1, "", "tpc_patch_version"], [45, 2, 1, "", "tpc_platform_type"]], "model_compression_toolkit.trainable_infrastructure": [[46, 0, 1, "", "BaseKerasTrainableQuantizer"], [46, 0, 1, "", "BasePytorchTrainableQuantizer"], [46, 0, 1, "", "TrainableQuantizerActivationConfig"], [46, 0, 1, "", "TrainableQuantizerWeightsConfig"], [46, 0, 1, "", "TrainingMethod"]], "model_compression_toolkit.wrapper.mct_wrapper": [[11, 0, 1, "", "MCTWrapper"]], "model_compression_toolkit.wrapper.mct_wrapper.MCTWrapper": [[11, 1, 1, "", "quantize_and_export"]], "model_compression_toolkit.xquant.common.xquant_config": [[12, 0, 1, "", "XQuantConfig"]], "model_compression_toolkit.xquant.keras.facade_xquant_report": [[36, 3, 1, "", "xquant_report_keras_experimental"]], "model_compression_toolkit.xquant.pytorch.facade_xquant_report": [[37, 3, 1, "", "xquant_report_pytorch_experimental"], [38, 3, 1, "", "xquant_report_troubleshoot_pytorch_experimental"]]}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "method", "Python method"], "2": ["py", "attribute", "Python attribute"], "3": ["py", "function", "Python function"], "4": ["py", "property", "Python property"]}, "objtypes": {"0": "py:class", "1": "py:method", "2": "py:attribute", "3": "py:function", "4": "py:property"}, "terms": {"": [3, 6, 8, 10, 21, 24, 25, 26, 27, 29, 31, 32, 34, 35, 41, 42, 43, 45, 46, 48, 50], "0": [1, 3, 4, 5, 7, 8, 11, 12, 14, 16, 21, 24, 25, 26, 27, 32, 41, 46, 48], "05": 8, "06": 5, "08153": 46, "1": [1, 3, 4, 5, 7, 8, 11, 12, 17, 20, 21, 22, 24, 25, 26, 28, 29, 30, 31, 32, 33, 41, 48, 50], "10": [20, 21, 24, 27, 28, 29, 31, 34], "10000000000": 5, "14": 11, "15": 41, "16": [12, 41, 48], "1902": 46, "1e": [5, 15, 17], "1st": 15, "2": [3, 8, 12, 15, 17, 20, 28, 45, 46, 48, 50], "20": 49, "2021": 50, "2023": 50, "224": [21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 41], "2nd": 15, "3": [3, 11, 15, 17, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 41, 46], "32": [4, 5, 11], "3e": [15, 17], "3rd": 15, "4": [15, 17, 20, 21, 24, 25, 27, 28, 29, 31, 32, 34, 48], "4th": 15, "5": [11, 12, 15, 17, 25, 32, 48], "50": [25, 32], "52587890625e": 8, "6": 28, "75": [11, 21, 24, 26, 27], "8": [20, 21, 24, 26, 27, 28, 41, 45, 46], "9": 43, "A": [0, 3, 4, 5, 7, 8, 13, 15, 17, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 36, 37, 38, 39, 40, 43, 44, 45, 50], "And": 48, "As": [5, 48, 49], "By": [4, 5, 11, 25, 29, 31, 32, 41, 49], "For": [3, 8, 12, 18, 19, 20, 21, 24, 26, 27, 28, 34, 41, 45, 46, 47, 48, 49, 50], "If": [2, 3, 4, 5, 12, 15, 17, 21, 24, 26, 27, 29, 31, 39, 41, 42, 45, 48], "In": [5, 20, 21, 24, 27, 28, 29, 31, 34, 41, 42, 44, 48], "It": [2, 11, 12, 45, 46, 48], "No": 1, "One": 49, "The": [0, 1, 3, 4, 5, 6, 7, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 24, 25, 26, 27, 28, 29, 31, 32, 34, 36, 37, 38, 41, 43, 45, 46, 48, 49], "Then": [3, 21, 24, 27, 29, 31, 34, 43, 49], "There": [41, 48, 49], "These": [48, 49], "To": [41, 48, 49], "With": 48, "_": [21, 24, 27, 29, 31, 34, 41], "_input_data": 41, "_model_input_nam": 41, "_model_output_nam": 41, "_with_model_output_loss_object": 48, "about": [3, 4, 7, 13, 15, 17, 21, 24, 26, 27, 41, 45, 46], "abov": [12, 48], "absolut": 9, "abstract": [13, 46], "accept": [15, 45], "access": 7, "accord": [13, 21, 22, 24, 25, 27, 29, 30, 31, 32, 34, 41, 42], "accordingli": 45, "accuraci": [12, 48], "achiev": 25, "act": 7, "act_hessian_default_batch_s": [15, 17], "action": 40, "activ": [0, 3, 4, 5, 8, 10, 11, 21, 22, 24, 27, 29, 30, 31, 34, 41, 43, 44, 45, 46, 48, 49], "activation_bias_correct": 8, "activation_bias_correction_threshold": 8, "activation_channel_equ": 8, "activation_error_method": [8, 11], "activation_memori": 10, "activation_min_max_map": 3, "activation_n_bit": [45, 46], "activation_op": 3, "activation_quantization_candid": 46, "activation_quantization_method": [43, 45, 46], "activation_quantization_param": 46, "activation_quantization_params_fn": 43, "activation_quantizer_map": 3, "activation_quantizer_params_overrid": 44, "activation_training_method": 44, "ad": 45, "adam": [14, 15, 17], "add": [1, 3, 12, 14, 16, 23, 46], "add_metadata": 45, "addit": [23, 41, 48], "address": 45, "advanc": 3, "affect": [21, 24, 26, 27], "after": [13, 21, 23, 24, 27, 34, 48, 50], "aim": [25, 32], "algorithm": 5, "align": [1, 14, 16], "all": [1, 3, 4, 5, 8, 43, 46, 49], "allimag": [1, 16], "allow": [6, 12, 20, 28, 41, 45], "along": 49, "also": [25, 32, 45], "an": [1, 2, 3, 4, 7, 11, 13, 21, 24, 27, 34, 36, 37, 38, 41, 42, 43, 45, 46, 48, 50], "analysi": [25, 32], "analyz": [25, 32, 38], "analyze_similar": 40, "ani": [1, 2, 3, 5, 11, 36, 37, 38, 41, 42, 46], "anneal": 4, "api": [3, 4, 24, 27, 34, 44, 48], "appli": [0, 1, 5, 8, 13, 41, 42, 43, 45, 48], "applic": [21, 22, 24, 25, 26, 27, 41], "approach": 6, "appropri": 48, "approxim": [6, 25, 32], "ar": [3, 5, 12, 18, 19, 21, 24, 25, 27, 29, 31, 32, 34, 41, 45, 46, 47, 48, 49], "architectur": [25, 32], "argument": [4, 41, 45], "arrai": [7, 11], "art": 50, "arxiv": [46, 50], "assess": [25, 32], "associ": [25, 32], "assum": [25, 32], "astyp": 41, "attent": [4, 15, 17, 46], "attirbut": 3, "attr": 42, "attr_nam": 43, "attr_valu": 43, "attr_weights_configs_map": 45, "attribut": [43, 45, 46], "attributefilt": 42, "auto": 13, "automat": 48, "auxiliari": [15, 17], "avail": 41, "averag": [1, 5, 14, 15, 16, 17, 48], "avg": 5, "awar": [13, 44, 46, 50], "axi": [3, 46, 48], "backend": 45, "base": [1, 4, 5, 8, 9, 11, 13, 15, 17, 18, 19, 20, 25, 28, 31, 32, 46, 48, 50], "base_config": 45, "basenod": 7, "basenodematch": 0, "basic": 46, "batch": [1, 4, 5, 14, 15, 16, 17, 20, 21, 24, 27, 28, 29, 31, 34], "batchnorm": [1, 14, 16, 20, 21, 24, 27, 29, 31, 34], "batchnorm2d": 28, "batchnormalignemntlosstyp": [14, 16], "batchwis": [1, 14], "been": 7, "begin": 4, "behavior": [40, 48], "being": [21, 24, 27, 29, 31, 34, 45, 46], "below": [12, 48], "between": [4, 5, 12, 21, 29, 31, 45, 48, 49], "bia": [4, 11, 15, 17, 21, 24, 26, 27], "bidwidth": 5, "bit": [0, 5, 10, 13, 21, 24, 26, 27, 34, 39, 41, 43, 45, 46, 50], "bit_width": 0, "bit_width_config": [0, 39], "bitwidth": [5, 12, 21, 24, 26, 27, 48], "bitwidthconfig": [13, 39], "block": [46, 49], "bn_alignment_loss_typ": [1, 14, 16], "bn_layer_typ": [1, 14, 16], "bnlayerweightingtyp": [14, 16], "bool": [1, 4, 5, 11, 12, 14, 15, 16, 17, 40, 45, 46], "boolean": 23, "bop": 10, "both": [11, 21, 24, 29, 31, 33, 46, 49], "build": [22, 30, 46, 50], "built": [27, 34, 46], "bypass": 40, "byte": [10, 21, 24, 25, 27, 32, 34, 49], "c": [12, 48], "calcul": [5, 6, 13, 21, 22, 24, 25, 27, 29, 30, 31, 32, 34, 48], "calibr": [11, 21, 22, 24, 27, 29, 30, 31, 34], "call": [22, 30, 35, 45, 49], "callabl": [3, 5, 11, 12, 15, 17, 21, 22, 24, 25, 27, 29, 30, 31, 32, 34, 36, 37, 38, 41, 42], "can": [3, 4, 8, 11, 13, 15, 17, 20, 22, 25, 28, 30, 32, 40, 41, 43, 45, 46, 48, 49, 50], "candid": [5, 21, 24, 26, 27, 43], "cannot": 45, "capabl": [11, 18, 19, 25, 30, 32], "case": 5, "caus": [12, 13, 38, 48], "chang": [20, 28, 41, 43, 48, 49], "changecandidatesactivationquantconfigattr": 43, "changecandidatesactivationquantizationmethod": 43, "changecandidatesweightsquantconfigattr": 43, "changecandidatesweightsquantizationmethod": 43, "changefinalactivationquantconfigattr": 43, "changefinalweightsquantconfigattr": 43, "changefinalweightsquantizationmethod": 43, "changequantizationmethod": 43, "changequantizationparamfunct": 43, "channel": [3, 6, 7, 13, 25, 32, 45, 46, 49], "channels_filtering_strategi": 6, "check": [5, 41, 42, 43], "choos": [1, 4, 41], "chosen": 49, "circl": 48, "class": [0, 1, 5, 6, 7, 8, 9, 10, 11, 12, 13, 23, 39, 40, 41, 42, 43, 44, 45, 46], "clibrat": 31, "click": 49, "clip": [1, 14, 16], "clone": 50, "coeffici": [3, 21, 24, 26, 27, 29, 31, 45, 46], "cohen": 50, "collaps": 11, "collect": [3, 21, 24, 27, 29, 31, 34, 36, 37, 38, 49], "com": 50, "combin": 45, "common": [0, 12], "compar": [5, 21, 29, 31, 48, 49], "comparison": 50, "compat": 41, "compil": 23, "complet": [4, 11], "compon": [45, 46, 48], "compress": [11, 13, 20, 25, 28, 29, 32, 48], "comput": [3, 4, 5, 9, 12, 13, 15, 17, 22, 30, 36, 40, 49], "compute_distance_fn": 5, "concat_threshold_upd": 8, "concaten": [12, 45, 48], "concatn": [12, 48], "config": [4, 20, 21, 24, 25, 26, 27, 28, 29, 32, 33, 34, 39, 43, 46], "configur": [0, 4, 5, 8, 10, 11, 13, 14, 15, 16, 17, 20, 21, 24, 25, 26, 27, 28, 29, 31, 32, 33, 34, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 48, 50], "configuration_overwrit": 5, "confirm": 48, "connect": 11, "consid": [6, 14, 16, 25, 32, 45], "consol": 48, "constant": [6, 43, 46], "constraint": [21, 24, 25, 29, 31, 32], "contain": [7, 13, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 38, 46, 48], "conv2d": [3, 20, 21, 24, 26, 27, 28, 43, 45], "conveni": 35, "convent": 48, "convert": [11, 13, 26, 33, 45], "core": [0, 3, 5, 8, 9, 10, 11, 21, 22, 24, 25, 26, 27, 29, 30, 32, 33, 34, 39, 40, 43], "core_config": [21, 22, 24, 26, 27, 29, 30, 31, 33, 34], "coreconfig": [13, 21, 22, 24, 26, 27, 29, 30, 31, 33, 34], "correct": 11, "correspond": [7, 48], "cosin": [48, 50], "count_param": [21, 24, 25, 26, 27], "countermeasur": 48, "cpuexecutionprovid": 41, "creat": [3, 4, 8, 11, 13, 14, 15, 16, 17, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 41, 42, 43, 45, 48], "creation": 41, "crop": 1, "cudaexecutionprovid": 41, "current": [4, 41], "custom": [5, 12, 20, 23, 27, 28, 41], "custom_metric_fn": 5, "custom_object": [23, 26, 27], "custom_similarity_metr": 12, "custom_tpc_opset_to_lay": 8, "cut": 40, "dash": 48, "data": [13, 14, 16, 22, 25, 30, 32, 36, 37, 38, 41, 45, 49, 50], "data_gen_batch_s": [1, 14, 16, 20, 28], "data_gener": [1, 14, 16, 20, 28], "data_generation_config": [20, 28], "data_init_typ": [1, 14, 16], "dataclass": [39, 40], "datagenerationconfig": [1, 13, 20, 28], "datainittyp": [14, 16], "dataset": [4, 11, 15, 17, 21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 36, 37, 38, 41, 48, 49], "debug": [39, 40], "debug_config": 39, "debugconfig": 39, "deeper": 49, "def": [21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 41], "default": [1, 2, 4, 5, 6, 11, 14, 15, 16, 17, 21, 24, 25, 29, 31, 32, 39, 41, 44, 45, 49], "default_data_gen_b": [14, 16], "default_factori": 2, "default_keras_extra_pixel": 14, "default_keras_initial_lr": 14, "default_keras_output_loss_multipli": 14, "default_keras_tpc": [21, 24, 25, 27], "default_n_it": [14, 16], "default_onnx_opset_vers": 41, "default_pytorch_bn_layer_typ": 16, "default_pytorch_extra_pixel": 16, "default_pytorch_initial_lr": 16, "default_pytorch_last_layer_typ": 16, "default_pytorch_output_loss_multipli": 16, "default_pytorch_tpc": [29, 31, 32, 34], "default_qco": 45, "default_valu": 2, "default_weight_attr_config": 45, "defaultdict": [3, 13], "defin": [0, 4, 5, 15, 17, 20, 21, 24, 25, 26, 27, 28, 29, 31, 32, 45, 46, 48], "degrad": [12, 13, 38, 48], "demonstr": [41, 45], "dens": [3, 20], "dense_nparam": [25, 32], "depend": [1, 21, 24, 27, 29, 31, 34], "describ": 48, "descript": 11, "desir": [13, 21, 22, 24, 26, 27, 29, 30, 31, 34], "detail": [41, 45, 48], "detect": [12, 13, 38, 48], "determin": [6, 25, 32, 45], "develop": 50, "deviat": 48, "devic": [13, 18], "device_typ": 18, "diagram": 45, "diamant": 50, "dict": [3, 7, 12, 36, 37, 38, 41, 45, 46, 48], "dictionari": [2, 3, 4, 12, 26, 27, 36, 37, 38, 41, 43, 44, 46], "differ": [1, 8, 13, 21, 24, 26, 27, 41, 45, 48, 49], "dikstein": 50, "dir": [12, 48, 49], "directori": [12, 13, 35, 48], "disabl": [15, 17], "displai": [48, 49], "distanc": [5, 11], "distance_weighting_method": [5, 11], "distil": [4, 50], "distribut": 9, "diverg": [9, 49], "divers": 1, "divid": 3, "divis": 49, "dnn": 46, "do": [1, 48, 49], "document": [13, 24, 27, 34, 48], "doe": 48, "doesn": 50, "don": 35, "done": 49, "dot": 49, "dqa": 46, "dror": 50, "dtype": 41, "dummi": 17, "durat": [25, 32], "dure": [4, 13, 14, 15, 16, 17, 18, 19, 36, 37, 38, 41, 43, 45, 46, 47, 49], "e": [3, 5, 11, 21, 24, 27, 29, 31, 34, 50], "each": [5, 6, 7, 12, 21, 24, 25, 27, 29, 31, 32, 34, 43, 45, 46, 48, 49], "easi": 48, "easili": [13, 50], "edit": [39, 40, 43], "editrul": 40, "either": 45, "element": [7, 45], "empti": 2, "emul": 46, "enabl": [1, 5, 8, 11, 13, 15, 17, 40, 46, 50], "enable_activation_quant": [45, 46], "enable_weights_quant": [45, 46], "encapsul": [0, 8], "end_step": 4, "engin": 50, "enhanc": 50, "ensur": 5, "entir": 13, "enum": [1, 3, 4, 6, 9, 46], "epoch": [4, 11, 15, 17], "epsilon": 5, "eptq": 50, "eq": 42, "equal": 42, "er_list": 43, "error": [9, 11, 12], "estim": [4, 46], "etc": [3, 10, 13, 21, 24, 27, 29, 31, 34, 49], "euclidean": 49, "evalu": [5, 36, 37, 38], "even": 48, "exact": 17, "exampl": [3, 8, 11, 15, 17, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 43, 45, 46, 50], "exceed": 48, "execut": 48, "exist": [2, 43, 48], "exp": 5, "exp_distance_weighting_sigma": 5, "expect": [4, 49], "experiment": [13, 20, 28, 50], "explain": [12, 13, 36, 37, 38, 46], "explicitli": 45, "expon": 5, "exponenti": 5, "export": 11, "extend": [25, 32], "extens": [11, 41, 50], "extra": [1, 14, 16], "extra_pixel": [1, 14, 16], "extrem": 48, "facade_xquant_report": [36, 37, 38], "factor": [4, 5, 9, 15, 17], "factori": [0, 4, 39, 40], "fake": 41, "fake_qu": [27, 34], "fakely_qu": 41, "fallback": 45, "fals": [4, 5, 8, 11, 12, 14, 15, 17, 40, 46], "familiar": 48, "fetch": 45, "few": [49, 50], "field": [18, 19, 42, 45, 47], "figur": [40, 49], "file": [23, 26, 27, 35, 41], "filepath": 23, "filter": [0, 1, 6], "final": [4, 5, 12, 13, 20, 28, 43, 48, 49, 50], "find": [21, 24, 27, 34], "fine": [15, 17, 25, 26, 27, 32, 33, 34], "first": [1, 21, 24, 27, 29, 31, 34, 41, 49], "first_layer_multipli": 1, "fix": 45, "fixed_scal": [18, 19, 45, 47], "fixed_zero_point": [18, 19, 45, 47], "flag": [1, 11, 40, 45], "flatten": [20, 28], "flip": 1, "float": [1, 4, 5, 11, 12, 14, 15, 16, 17, 21, 27, 29, 31, 34, 36, 37, 38, 41, 45, 46, 48, 49], "float32": [25, 32, 41], "float_model": [11, 36, 37, 38, 41, 48], "fold": [21, 24, 27, 29, 31, 34], "folder": [35, 48], "follow": [3, 4, 11, 12, 46, 48, 49], "footprint": [25, 32], "form": 45, "format": [3, 13], "fraction": 4, "framework": [3, 11, 46], "frameworkquantizationcap": [22, 29, 30, 31], "free": [6, 20, 25, 28, 32, 50], "freez": 46, "freeze_quant_param": 46, "friendli": [25, 32, 50], "from": [3, 4, 11, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 41, 43, 45, 46, 47, 48, 49, 50], "from_config": 46, "function": [3, 4, 5, 11, 12, 13, 14, 15, 16, 17, 20, 23, 25, 28, 32, 35, 43, 45, 46, 48], "fuse_op_quantization_config": 45, "fusing_pattern": 45, "futur": [18, 19, 20, 28, 45, 47], "g": [3, 11, 21, 24, 27, 29, 31, 34], "gather": [45, 49], "gaussian": [1, 14, 16], "gener": [2, 12, 13, 14, 16, 21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 36, 37, 38, 45, 49, 50], "generated_imag": [20, 28], "get": [2, 3, 4, 5, 13, 21, 24, 26, 27, 29, 31, 33, 34, 45, 49], "get_config": 46, "get_input": 41, "get_keras_data_generation_config": [13, 14, 20], "get_keras_gptq_config": [11, 13, 15, 21], "get_ort_session_opt": 41, "get_output": 41, "get_pytorch_data_generation_config": [13, 16, 28], "get_pytorch_gptq_config": [11, 13, 17], "get_target_platform_cap": [13, 18, 45], "get_target_platform_capabilities_sdsp": [13, 19, 45], "git": 50, "github": [41, 50], "given": [2, 21, 22, 24, 27, 29, 30, 31, 34], "gordon": 50, "gptq": [4, 11, 15, 17, 21, 29], "gptq_conf": [15, 17, 29], "gptq_config": [21, 29, 31], "gptq_quantizer_params_overrid": 4, "gptq_representative_data_gen": [21, 29], "grad": 1, "gradient": [1, 4, 11, 13, 31, 50], "gradientptq": [4, 13], "gradientptqconfig": [13, 21, 29], "gradual": 4, "gradual_activation_quant": [15, 17], "gradual_activation_quantization_config": 4, "gradualactivationquant": [15, 17], "gradualactivationquantizationconfig": [15, 17], "granular": [1, 14, 16], "graph": [22, 30, 43, 49], "greater": 42, "greatereq": 42, "greedi": [5, 6], "group": [3, 6, 25, 32, 45], "h": 50, "ha": [7, 41, 42, 43], "habi": 50, "handl": [11, 21, 24, 27, 29, 31, 34], "handler": 35, "hardwar": [13, 25, 32, 45, 46, 50], "have": [3, 41, 42, 48, 49], "henc": 45, "here": [12, 25, 32, 41, 45, 48, 50], "hessian": [4, 5, 6, 9, 11, 15, 17, 25, 32, 50], "hessian_batch_s": [4, 5, 15, 17], "hessian_weights_config": 4, "hessians_num_sampl": 4, "higher": [25, 32], "highlight": 48, "hight": 28, "histogram": [21, 24, 27, 29, 31, 34, 49], "hmse": 9, "hold": [3, 39, 42, 45], "holder": 46, "how": [3, 6, 21, 22, 24, 27, 29, 31, 34, 41, 46, 50], "howev": 41, "hptq": [45, 50], "http": [46, 50], "hw": 22, "i": [1, 2, 3, 4, 5, 6, 7, 9, 11, 12, 13, 15, 17, 20, 21, 24, 25, 26, 27, 28, 29, 31, 32, 34, 35, 39, 40, 41, 42, 43, 45, 46, 48, 49, 50], "ident": [1, 5], "identifi": [25, 32, 45, 48], "ignor": [18, 19, 45, 47], "ilp": [21, 24, 27, 34], "imag": [1, 4, 5, 11, 14, 16, 20, 21, 24, 27, 28, 29, 31, 34, 48, 49], "image_clip": [1, 14, 16], "image_granular": [1, 14, 16], "image_normalization_typ": [1, 14, 16], "image_pipeline_typ": [1, 14, 16], "imagegranular": [14, 16], "imagenet": 1, "imagenet1k_v1": 32, "imagenormalizationtyp": [14, 16], "imagepipelinetyp": [14, 16], "imagewis": 1, "impact": [25, 32], "implement": [12, 46], "implment": 46, "import": [3, 6, 7, 8, 11, 13, 15, 17, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 41, 43, 46, 48, 49], "importance_metr": 6, "importance_scor": 7, "improv": [5, 25, 32, 48], "imx500": [11, 41, 45], "imx500_tp_model": 18, "in_model": [21, 22, 24, 26, 27, 30, 33, 34], "in_modul": [31, 48], "includ": [4, 7, 11, 21, 24, 27, 29, 31, 34, 45, 46], "increas": [4, 5], "index": [3, 13], "indic": [3, 7, 25, 32, 45, 48], "individu": 48, "induc": 9, "inf": [8, 10, 11], "infer": [13, 26, 33, 45, 46], "inferablequant": [26, 33], "inferencesess": 41, "info": [6, 35], "inform": [3, 4, 13, 15, 17, 18, 19, 21, 24, 25, 27, 29, 31, 32, 34, 40, 45, 46, 47], "infrastructur": 46, "init": [13, 43, 50], "initi": [1, 2, 4, 6, 11, 12, 14, 16, 27, 34, 46, 48], "initial_lr": [1, 14, 16], "initial_q_fract": 4, "inner": 2, "input": [1, 5, 11, 14, 16, 21, 24, 27, 29, 31, 34, 40, 45, 48], "input_sc": 8, "input_shap": 20, "insert": 49, "insert_preserving_quant": 45, "instal": 41, "instanc": [4, 11, 13, 15, 17, 43, 45, 49], "instanti": [4, 8, 44], "instruct": 45, "insuffici": [12, 48], "int": [0, 1, 4, 5, 6, 12, 14, 15, 16, 17, 20, 28, 35, 41, 45, 46, 48], "int8": 41, "integ": [5, 41, 45], "interest": 5, "interfac": [4, 11, 17], "introduc": 46, "inverse_min_max_diff": 1, "involv": [20, 25, 28, 32], "is_detect_under_threshold_quantize_error": 12, "is_keras_layer_export": 41, "is_layer_exportable_fn": 41, "is_pytorch_layer_export": 41, "is_simd_pad": 45, "issu": [5, 41, 48], "item": 48, "iter": [1, 14, 16, 20, 21, 24, 27, 28, 29, 31, 34], "its": [2, 3, 11, 13, 23, 25, 32, 42, 45, 49], "jen": 50, "judg": [12, 13, 38, 48], "judgment": 48, "just": 50, "keep": [33, 50], "kei": [2, 11, 12, 25, 32, 42], "kept": [7, 27, 34], "ker": 27, "kera": [3, 11, 13, 43, 46, 50], "keras_appl": [1, 14], "keras_data_generation_experiment": [13, 20], "keras_default_tpc": 22, "keras_file_path": 41, "keras_gradient_post_training_quant": [13, 15, 21], "keras_load_quantized_model": 23, "keras_post_training_quant": [13, 24, 41, 43, 49], "keras_pruning_experiment": [13, 25], "keras_quantization_aware_training_finalize_experiment": [13, 26], "keras_quantization_aware_training_init_experiment": [13, 26, 27], "keras_resource_utilization_data": [13, 22], "kernel": [3, 21, 24, 26, 27, 43, 46], "kernel_channels_map": 3, "kernel_op": 3, "kernel_ops_attributes_map": 3, "keyword": 45, "kl": [9, 49], "know": [3, 13], "knowledg": [4, 50], "known_dict": 2, "kwarg": 43, "l": [25, 50], "l2": 1, "l2_squar": [1, 14, 16], "l_p_valu": 8, "label": [6, 25, 32, 45, 50], "lambda": 41, "larg": [12, 48], "larger": 5, "last": [3, 4, 5, 48], "last_lay": 5, "last_layer_typ": [1, 16], "latenc": 41, "latest": 50, "launch": 49, "layaer": [13, 38], "layer": [1, 3, 5, 7, 11, 12, 14, 15, 16, 17, 20, 21, 24, 25, 26, 27, 29, 31, 32, 33, 34, 40, 41, 43, 45, 46, 48, 49], "layer_min_max_map": 3, "layer_weighting_typ": [1, 14, 16], "layerfilterparam": 42, "learn": [1, 14, 15, 16, 17, 46], "learnabl": 46, "least": 6, "left": 11, "let": 41, "level": 35, "lfh": [6, 25, 32], "librari": [3, 8], "like": [8, 45], "limit": [6, 21, 24, 26, 27, 29, 31, 34], "line": 48, "linear": [4, 11, 28], "linear_collaps": [8, 11], "linearli": 4, "link": 48, "list": [0, 1, 3, 5, 11, 14, 15, 16, 20, 28, 40, 41, 43, 50], "liter": 45, "ll": [20, 28], "load": [13, 26, 27, 41, 46], "load_model": [26, 27], "loadopt": 23, "log": [4, 12, 13, 15, 17, 35, 48, 49], "log_funct": [4, 15, 17], "log_norm": 4, "log_tensorboard_xqu": 48, "logdir": 49, "logger": [13, 40, 49], "longer": 41, "look": [24, 27, 34, 45, 50], "lookup": 45, "loss": [1, 4, 12, 14, 15, 16, 17, 21, 25, 29, 31, 32, 48], "low": 11, "lp": 9, "lsq": 46, "lut_pot_quant": 45, "lut_sym_quant": 45, "lut_values_bitwidth": 45, "mae": [9, 49], "mai": [20, 21, 24, 27, 28, 29, 31, 34, 42, 49], "main": [11, 45, 48, 49], "make": 9, "manag": [0, 11], "mandatori": 41, "mani": 49, "manipul": [0, 1], "manner": 45, "manual": [0, 13, 39, 48], "manual_activation_bit_width_selection_list": 0, "manual_weights_bit_width_selection_list": 0, "manualweightsbitwidthselect": 0, "map": [3, 45], "mask": 7, "match": [18, 19, 42, 43], "mathemat": 49, "max": [1, 3, 5, 8, 9, 21, 22, 24, 27, 29, 30, 31, 34, 49], "maxbit": 5, "maxim": [21, 24, 27, 34], "mct": [3, 8, 11, 13, 15, 17, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 39, 40, 41, 43, 45, 46, 47, 48, 49, 50], "mct_current_schema": 45, "mct_quantiz": 41, "mct_wrapper": 11, "mctwrapper": 11, "mean": [1, 4, 9, 49], "measur": [6, 10, 12, 48, 49], "meet": [25, 32], "memori": [10, 25, 32, 49], "messag": 48, "metadata": [7, 45], "method": [4, 5, 6, 9, 11, 13, 25, 32, 35, 41, 43, 44, 45, 46], "metric": [4, 5, 6, 12, 36, 37, 38, 48], "metric_epsilon": 5, "metric_norm": 5, "metric_normalization_threshold": 5, "min": [1, 3, 5, 8, 9, 21, 24, 27, 29, 31, 34, 49], "min_threshold": [8, 46], "minbit": 5, "minim": [5, 9, 21, 25, 29, 31, 32], "minimum": 46, "minor": 45, "minut": 50, "mix": [5, 10, 11, 12, 13, 21, 22, 24, 26, 27, 29, 30, 31, 34, 39, 45, 48, 50], "mixed_precis": 11, "mixed_precision_config": [21, 22, 24, 26, 27, 39], "mixedprecisionquantizationconfig": [11, 13, 21, 22, 24, 26, 27, 39], "mkstemp": 41, "mobilenet": [21, 22], "mobilenet_v2": [24, 26, 27, 29, 30, 31, 33, 34, 41], "mobilenetv2": [24, 26, 27, 41, 49], "model": [3, 4, 5, 7, 8, 10, 11, 12, 13, 18, 19, 20, 21, 24, 25, 28, 29, 31, 32, 36, 37, 38, 39, 40, 43, 44, 45, 46, 48, 49], "model_compression_toolkit": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 49], "model_fil": [26, 27], "model_format_onnx_mctq": 41, "model_mp": 5, "model_output": 41, "modifi": [13, 43], "modul": [13, 28, 29, 30, 31, 32, 37, 38], "more": [9, 18, 19, 24, 25, 27, 32, 34, 41, 45, 47, 48, 49], "most": 48, "mse": [8, 9, 11, 12, 48, 49], "multipl": [3, 5, 35, 45], "multiple_tensors_mse_loss": 4, "multipli": [1, 12, 14, 16, 48], "must": [25, 32, 45], "n_epoch": [4, 11, 15, 17, 21], "n_imag": [20, 28], "n_iter": [1, 14, 16, 20, 28], "nadam": 15, "name": [12, 43, 45, 48, 49], "nchw": 3, "ndarrai": 7, "necessari": [4, 11, 41, 46, 48], "need": [3, 11, 13, 21, 24, 27, 29, 31, 34, 41, 42, 46, 48], "neg": [1, 5, 48], "negative_min_max_diff": [1, 16], "network": [3, 6, 11, 33, 39, 40, 43, 49, 50], "network_editor": [13, 40], "netzer": 50, "neural": [6, 11, 50], "neuron": 7, "new": [43, 45], "next": [20, 28, 41, 42], "nhwc": 3, "nn": [28, 37, 38], "no_norm": 1, "no_quantization_op": 3, "noclip": [8, 9], "node": [0, 27, 34, 41, 43, 46, 49], "node_nam": 43, "node_name_scop": 43, "node_typ": 43, "nodenamefilt": 43, "nodenamescopefilt": 43, "nodetypefilt": 43, "nois": 9, "non": [5, 15, 17, 45], "none": [1, 2, 4, 5, 8, 11, 12, 15, 17, 21, 23, 24, 27, 29, 31, 34, 35, 39, 41, 43, 44, 45, 46], "norm": [9, 49], "norm_scor": [4, 5], "normal": [1, 4, 5, 14, 16], "note": [21, 24, 26, 27], "notebook": 50, "noteq": 42, "notic": [20, 25, 28, 32, 41], "now": [6, 18, 19, 34, 41, 45, 46, 47, 49], "np": [7, 11, 21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 41], "num_calibration_batch": [21, 24, 27, 29, 31, 34], "num_interest_points_factor": 5, "num_of_imag": [5, 11, 21, 24], "num_score_approxim": [6, 25, 32], "number": [1, 4, 5, 6, 11, 12, 14, 15, 16, 17, 20, 21, 24, 25, 27, 28, 29, 31, 32, 34, 45, 46, 48], "numel": 32, "numer": 5, "numpi": [21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 41], "o": 50, "object": [0, 3, 4, 5, 6, 10, 12, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 26, 27, 29, 30, 31, 34, 41, 43, 45, 46, 48], "observ": [21, 29, 31, 45, 49], "one": [5, 42, 49], "onli": [3, 4, 5, 6, 12, 21, 24, 26, 27, 41, 45], "onlin": [27, 34], "onnx": 11, "onnx_file_path": 41, "onnx_opset_vers": 41, "onnxruntim": 41, "op": [42, 45], "open": [41, 49, 50], "oper": [3, 10, 40, 42, 45], "operator_group": 45, "operator_set": 45, "operators_set": 45, "operatorsetnam": 45, "opquantizationconfig": [18, 19, 47], "optim": [1, 3, 4, 10, 11, 13, 14, 15, 16, 17, 18, 19, 21, 22, 24, 27, 29, 30, 31, 34, 39, 45, 46, 47, 50], "optimizer_bia": 4, "optimizer_quantization_paramet": 4, "optimizer_rest": [4, 15, 17], "optimizerv2": 15, "option": [11, 13, 21, 23, 24, 25, 27, 29, 31, 32, 34, 41, 45], "order": [15, 17, 21, 24, 27, 34, 40, 41, 42, 44], "org": 46, "orient": [13, 46], "origin": [25, 35, 36, 37, 38, 49], "ort": 41, "other": [1, 11, 15, 17, 48], "otherwis": 45, "our": [21, 24, 26, 27, 34, 50], "out": [3, 6], "out1": 50, "out2": 50, "out3": 50, "out_channel_axis_map": 3, "outlier": [12, 48], "output": [1, 3, 12, 14, 16, 20, 21, 24, 27, 28, 29, 31, 33, 34, 45, 48, 49, 50], "output_image_s": [20, 28], "output_loss_multipli": [1, 14, 16], "output_loss_typ": [1, 14, 16], "output_nam": 41, "outputlosstyp": [14, 16], "over": 5, "overrid": [4, 44], "overwrit": 5, "p": 32, "packag": [41, 46, 50], "pad": 45, "page": 13, "pair": 49, "param": [17, 40, 43, 46], "param_item": 11, "paramet": [1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "pars": 45, "part": 41, "pass": [2, 3, 5, 15, 17, 21, 24, 25, 26, 27, 29, 31, 32, 33, 34, 43], "patch": 45, "path": [11, 13, 23, 35, 41, 48, 49], "pattern": 45, "pdf": 46, "per": [1, 3, 4, 21, 24, 27, 34, 45, 46, 49], "per_sampl": 4, "percentag": 5, "peretz": 50, "perform": [6, 10, 11, 20, 25, 28, 32], "phase": 49, "pinpoint": 40, "pip": [41, 50], "pipelin": [1, 11, 14, 16], "pixel": [1, 14, 16], "place": 45, "plan": 41, "platform": [11, 18, 19, 21, 24, 25, 26, 27, 30, 32, 45], "pleas": [24, 27, 34, 41, 44, 48, 50], "plot": [40, 49], "point": [4, 5, 15, 17, 21, 29, 31, 36, 37, 38, 45, 49], "posit": 45, "possibl": [9, 21, 24, 27, 34, 45, 49], "post": [4, 11, 13, 25, 27, 32, 34, 50], "power": [21, 24, 27, 29, 31, 34, 45], "power_of_two": 45, "poweroftwo": 46, "pre": 5, "preced": [21, 24, 27, 29, 31, 34], "precis": [5, 10, 11, 12, 13, 21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 34, 39, 45, 48, 50], "predefin": [5, 6], "predict": 41, "prepar": [11, 13, 27, 34], "preprint": 50, "present": [2, 48, 49], "preserv": 45, "pretrain": [33, 34], "prevent": 5, "prior": 5, "prioriti": 11, "problemat": 40, "procedur": 48, "process": [4, 5, 8, 13, 14, 15, 16, 17, 18, 19, 20, 25, 28, 32, 39, 40, 43, 44, 45, 47, 49], "product": 49, "project": [41, 50], "properti": 7, "propos": [46, 48], "provid": [2, 11, 20, 25, 28, 32, 41, 45, 46, 48, 49], "prune": [10, 50], "pruned_model": [25, 32], "pruning_config": [25, 32], "pruning_info": [25, 32], "pruning_mask": 7, "pruning_num_score_approxim": 6, "pruningconfig": [6, 13, 25, 32], "pruninginfo": [7, 13, 25, 32], "ptq": [11, 24, 31, 41, 48], "purpos": [20, 28, 40], "py": 50, "pydantic_cor": 45, "pypi": 50, "python": [35, 50], "pytorch": [11, 13, 45, 46, 50], "pytorch_data_generation_experiment": [13, 28], "pytorch_default_tpc": 30, "pytorch_gradient_post_training_quant": [13, 17, 29], "pytorch_post_training_quant": [13, 31, 41, 48], "pytorch_pruning_experiment": [13, 32], "pytorch_quantization_aware_training_finalize_experiment": [13, 33], "pytorch_quantization_aware_training_init_experiment": [13, 33, 34], "pytorch_resource_utilization_data": [13, 30], "q": 41, "q_fraction_scheduler_polici": 4, "qat": [26, 27, 33, 34, 44], "qat_config": [13, 27, 34], "qatconfig": [27, 34], "qc": 8, "qc_option": 45, "qmodel": 11, "qnnpack": 45, "quant": 41, "quantifi": [7, 49], "quantiz": [0, 3, 4, 5, 8, 9, 11, 12, 13, 15, 17, 20, 22, 28, 30, 36, 37, 38, 39, 40, 43, 44, 45, 46, 49, 50], "quantization_config": [39, 46], "quantization_configur": 45, "quantization_format": 41, "quantization_info": [21, 24, 26, 27, 29, 31, 33, 34], "quantization_preserv": [18, 19, 45, 47], "quantizationconfig": [13, 39], "quantizationerrormethod": [8, 11, 13], "quantizationmethod": [3, 46], "quantize_and_export": 11, "quantize_reported_dir": [12, 48], "quantized_exportable_model": 41, "quantized_info": 48, "quantized_model": [11, 21, 24, 26, 27, 33, 34, 36, 37, 38, 48], "quantized_modul": [29, 31], "quantizewrapp": [13, 27, 33, 34], "question": 41, "r": 50, "radam": 16, "rais": 45, "random": [21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 41], "random_data_gen": 48, "rang": [3, 12, 21, 24, 27, 29, 31, 34, 48], "rate": [1, 14, 15, 16, 17], "ratio": [11, 12, 48], "readi": 33, "readm": 41, "receiv": 11, "recent": 48, "recommend": 48, "recov": [25, 32], "red": 48, "reduc": [5, 25, 32], "reduce_on_plateau": [1, 14], "reduce_on_plateau_with_reset": 16, "reduceonplateau": 1, "refer": [41, 48], "refine_mp_solut": 5, "regard": 42, "regular": [1, 4, 15, 17], "regularization_factor": [4, 15, 17], "regularized_min_max_diff": [1, 14], "relat": [3, 7, 13, 45], "releas": 50, "relev": 41, "relu": 3, "relu_bound_to_power_of_2": 8, "remov": [12, 25, 32, 33, 48], "replac": [26, 48], "report": [12, 13, 48], "report_dir": [12, 48], "repositori": 41, "repr_datagen": [21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34], "repr_dataset": [36, 37, 38, 41], "repres": [4, 5, 10, 11, 15, 17, 21, 24, 25, 26, 27, 29, 31, 32, 33, 34, 36, 37, 38, 41, 43, 45, 48, 49], "representative_data_gen": [21, 22, 24, 25, 27, 29, 30, 31, 32, 34, 41, 48], "representative_dataset": 11, "request": 2, "requir": [21, 24, 27, 29, 31, 34, 46, 49], "research": 50, "reshap": [3, 20], "residu": 11, "residual_collaps": [8, 11], "resnet50": [25, 32, 41], "resnet50_weight": 32, "resourc": [6, 10, 11, 13, 21, 24, 25, 26, 27, 32, 33, 34, 49], "resourceutil": [13, 21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 34], "respect": 48, "respectivli": 3, "rest": 4, "result": 48, "retrain": [25, 32], "retriev": [18, 19, 45], "return": [2, 4, 5, 7, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41], "round": 4, "rounding_typ": 4, "ru": [21, 24, 26, 27], "ru_data": [22, 30], "rule": [40, 43], "run": [4, 15, 17, 41, 49], "runner": 40, "same": [1, 41, 45], "sampl": [4, 15, 17, 49], "save": [3, 11, 12, 27, 35, 41, 46, 48], "save_model_path": [11, 41], "saved_model": 23, "savedmodel": 23, "scalar": 49, "scale": [4, 5, 45], "scale_log_norm": 4, "schedul": [1, 4, 14, 16, 40], "scheduler_typ": [1, 14, 16], "schedulertyp": [14, 16], "schema": 45, "schema_vers": 45, "score": [4, 5, 6, 7, 9, 11, 15, 17, 25, 32], "sdsp": [11, 13, 45], "sdsp_v3_14": 19, "sdsp_version": [11, 19], "search": [5, 10, 13, 21, 24, 27, 29, 31, 34], "second": 49, "see": [4, 17, 48, 50], "seen": 49, "select": [0, 3, 6, 8, 9, 11, 13, 39, 41, 44, 45, 46], "self": 45, "semiconductor": 50, "sensit": [5, 6, 25, 32], "sequenti": [20, 28], "serial": 13, "serialization_format": 41, "sess": 41, "session": 41, "set": [3, 11, 12, 13, 15, 17, 20, 21, 24, 25, 26, 27, 28, 29, 31, 32, 34, 35, 36, 37, 38, 41, 43, 45, 46, 48, 49], "set_log_fold": [35, 48, 49], "setup": [11, 50], "sever": [21, 24, 27, 29, 31, 34, 49], "shift": 48, "shift_negative_activation_correct": 8, "shift_negative_params_search": 8, "shift_negative_ratio": 8, "shift_negative_threshold_recalcul": 8, "shortli": 45, "should": [3, 6, 15, 21, 22, 24, 25, 26, 27, 29, 31, 32, 34, 41, 45, 49], "show": 49, "shown": 48, "sigma": 5, "signal": 9, "signed": 45, "signific": [7, 48], "significantli": 48, "simd": [25, 32, 45], "simd_siz": 45, "similar": [9, 12, 36, 37, 38, 40, 48, 50], "similarli": 45, "simpl": [20, 28], "simplic": [20, 28], "simul": 40, "simulate_schedul": 40, "simultan": 45, "singl": 45, "six": 48, "size": [1, 4, 5, 14, 15, 16, 17, 20, 21, 24, 26, 27, 28, 34, 41, 46], "skip": [12, 40, 41, 48], "slowli": 41, "small": 48, "smaller": 42, "smallereq": 42, "smooth": [1, 46], "smoothing_and_augment": [1, 14, 16], "so": [11, 41], "softmax": 3, "softmax_shift": 8, "softquant": 4, "solut": 50, "solver": [21, 24, 27, 34], "some": [18, 19, 20, 28, 41, 45, 47, 49], "soni": 50, "sonysemiconductorsolut": 50, "sourc": 50, "specif": [0, 3, 11, 13, 25, 32, 43, 48, 49], "specifi": [6, 11, 12, 14, 16, 18, 20, 23, 25, 28, 32, 41, 45, 48], "sphinx": 13, "sqnr": [12, 48], "squar": [1, 9], "stabl": 50, "stage": 49, "standard": [25, 32, 46], "start": [20, 28, 41, 46, 50], "start_step": 4, "state": 50, "state_dict": 32, "statist": [3, 21, 24, 27, 29, 31, 34, 49], "ste": [4, 44, 46], "step": [1, 4, 46, 48], "store": [7, 46], "str": [3, 11, 12, 18, 19, 21, 22, 24, 25, 27, 29, 30, 31, 32, 34, 35, 36, 37, 38, 41, 42, 45, 48], "straight": [4, 46], "strategi": [6, 25, 32], "string": 43, "structur": [13, 50], "student": 4, "success": 11, "suffer": 41, "suggest": 48, "sum": [10, 22, 25, 30, 32], "support": [4, 11, 41], "supported_input_activation_n_bit": 45, "symmetr": [21, 24, 27, 29, 31, 34, 45, 46], "t": [35, 50], "tab": 49, "tabl": 45, "tag": 49, "take": [5, 24, 27, 34, 50], "target": [4, 11, 13, 18, 19, 21, 22, 24, 25, 26, 27, 30, 32, 33, 34, 45], "target_platform_cap": [21, 22, 24, 25, 27, 29, 30, 31, 32, 34, 42, 46], "target_q_fract": 4, "target_resource_util": [21, 24, 25, 27, 29, 31, 32, 34], "targetplatformcap": [13, 21, 22, 24, 25, 27, 29, 30, 31, 32, 34], "teacher": 4, "tempfil": 41, "tensor": [5, 11, 12, 15, 17, 20, 22, 28, 30, 45, 46, 49, 50], "tensorboard": [40, 50], "tensorflow": [3, 11, 13, 15, 20, 21, 22, 24, 25, 26, 27, 41, 43, 45, 50], "tf": [3, 11, 15, 20, 23, 26, 27], "tflite": [41, 45], "than": [5, 42, 48], "thei": 3, "them": [45, 49], "thi": [5, 7, 8, 9, 11, 13, 20, 21, 23, 24, 25, 26, 27, 28, 29, 31, 32, 34, 35, 41, 45, 46, 48, 50], "those": 48, "three": [3, 48], "threshold": [5, 8, 9, 11, 12, 21, 24, 27, 29, 31, 34, 45, 46, 48], "threshold_bitwidth_mixed_precis": 48, "threshold_bitwidth_mixed_precision_with_model_output_loss_object": 12, "threshold_degrade_layer_ratio": [12, 48], "threshold_quantize_error": [12, 48], "threshold_ratio_unbalanced_concaten": [12, 48], "threshold_zscore_outlier_remov": [12, 48], "through": [4, 20, 25, 28, 46], "throughout": 4, "thu": [25, 32, 49], "time": [3, 6, 46], "togeth": [25, 32], "tool": [11, 13, 46, 50], "toolkit": [11, 13, 20, 28, 29, 48], "torch": [17, 28, 37, 38, 41, 50], "torchscript": 41, "torchvis": [1, 16, 29, 30, 31, 32, 33, 34, 41], "total": [10, 22, 30], "total_memori": 10, "tpc": [11, 13, 25, 32, 45], "tpc_minor_vers": 45, "tpc_patch_vers": 45, "tpc_platform_typ": 45, "tpc_v1_0": 18, "tpc_version": 18, "trace": 41, "train": [4, 11, 13, 44, 46, 50], "train_bia": 4, "trainabl": [23, 26, 46], "trainable_infrastructur": 44, "trainablequant": 26, "transform": [1, 21, 24, 27, 29, 31, 34], "transpos": 3, "treat": 45, "troubleshoot": 13, "true": [1, 5, 8, 11, 12, 15, 16, 17, 23, 33, 34, 46], "try": 5, "tun": 34, "tune": [15, 17, 25, 26, 27, 32, 33], "tupl": [1, 3, 11, 14, 16, 20, 21, 24, 25, 28, 29, 31, 32, 43, 45], "tutori": 48, "two": [5, 12, 21, 24, 27, 29, 31, 34, 41, 45, 48, 49], "type": [0, 1, 2, 4, 5, 6, 7, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 26, 28, 29, 30, 31, 32, 35, 36, 37, 38, 41, 43, 45, 48], "ui": 49, "unbalanc": [12, 48], "unchang": 40, "under": 49, "unifi": 11, "uniform": [45, 46], "union": [1, 14, 16, 20, 21, 22, 24, 25, 27, 28, 29, 30, 31, 32, 34, 45], "uniqu": 45, "up": [6, 20, 28, 35, 45, 49], "updat": [4, 11], "upon": 46, "us": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50], "use_hessian_based_scor": [5, 11], "use_hessian_based_weight": [15, 17], "use_hessian_sample_attent": [15, 17], "use_mixed_precis": 11, "user": [11, 13, 21, 24, 26, 27, 29, 31, 33, 34, 48], "userinform": [21, 24, 29, 31], "util": [6, 11, 13, 21, 24, 25, 26, 27, 32, 33, 34, 46], "v": 50, "valid": [36, 37, 38, 45, 46, 48], "validation_dataset": [36, 37, 38, 48], "validationerror": 45, "valu": [1, 2, 3, 4, 5, 6, 9, 11, 12, 21, 24, 25, 26, 27, 32, 41, 42, 43, 45, 46, 48], "valuabl": 9, "variabl": [11, 15, 17], "variou": [11, 20, 28, 49], "vector": [4, 49], "verbos": 35, "version": [11, 13, 20, 28, 45], "via": [41, 50], "view": 49, "visit": [44, 50], "visual": [48, 50], "wa": [2, 41, 48], "wai": [49, 50], "walk": [20, 28], "want": 3, "warn": [11, 48], "we": [3, 20, 21, 24, 25, 27, 28, 32, 34, 41, 43, 45, 46, 49], "weight": [0, 1, 3, 4, 5, 8, 10, 11, 14, 15, 16, 17, 21, 22, 25, 27, 29, 30, 31, 32, 33, 34, 41, 43, 44, 45, 46, 49], "weight_quantizer_params_overrid": 44, "weight_training_method": 44, "weights_bias_correct": [8, 11], "weights_channels_axi": 46, "weights_compression_ratio": 11, "weights_error_method": 8, "weights_memori": [6, 10, 21, 24, 25, 27, 32, 34], "weights_n_bit": [43, 45, 46], "weights_per_channel_threshold": [45, 46], "weights_quantization_candid": 46, "weights_quantization_method": [43, 45, 46], "weights_quantization_param": 46, "weights_quantization_params_fn": 43, "weights_second_moment_correct": 8, "were": 49, "when": [1, 2, 3, 4, 5, 6, 9, 10, 12, 13, 15, 17, 21, 24, 26, 27, 40, 41, 42, 44, 45, 46, 48, 49], "where": [7, 12, 41, 43, 48, 49], "whether": [4, 5, 7, 11, 14, 15, 16, 17, 23, 40, 41, 45, 46], "which": [4, 6, 40, 41, 42, 43, 45, 46], "while": [8, 21, 24, 26, 27, 34, 45], "who": 48, "width": [0, 5, 12, 13, 21, 24, 27, 28, 34, 39, 45, 48, 50], "within": [40, 45, 48, 50], "without": 13, "work": 50, "would": 49, "wrap": [2, 3, 23, 27, 34, 42, 45, 46], "wrapper": [27, 33, 34, 46], "writer": 49, "x": 48, "xquant": [11, 50], "xquant_config": [12, 36, 37, 38, 48], "xquant_report_keras_experiment": [13, 36], "xquant_report_pytorch_experiment": [13, 37, 48], "xquant_report_troubleshoot_pytorch_experiment": [12, 13, 38, 48], "xquantconfig": [12, 13, 36, 37, 38], "y": 48, "yield": [21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 41], "you": [8, 11, 41, 45, 49, 50], "your": [41, 48], "z": 11, "z_score": [12, 48], "z_threshold": [8, 11], "zero": [5, 45]}, "titles": ["BitWidthConfig", "Data Generation Configuration", "DefaultDict Class", "FrameworkInfo Class", "GradientPTQConfig Class", "MixedPrecisionQuantizationConfig", "Pruning Configuration", "Pruning Information", "QuantizationConfig", "QuantizationErrorMethod", "ResourceUtilization", "wrapper", "XQuant Configuration", "API Docs", "Get DataGenerationConfig for Keras Models", "Get GradientPTQConfig for Keras Models", "Get DataGenerationConfig for Pytorch Models", "Get GradientPTQConfig for Pytorch Models", "Get TargetPlatformCapabilities for tpc version", "Get TargetPlatformCapabilities for sdsp converter version", "Keras Data Generation", "Keras Gradient Based Post Training Quantization", "Get Resource Utilization information for Keras Models", "Load Quantized Keras Model", "Keras Post Training Quantization", "Keras Structured Pruning", "Keras Quantization Aware Training Model Finalize", "Keras Quantization Aware Training Model Init", "Pytorch Data Generation", "Pytorch Gradient Based Post Training Quantization", "Get Resource Utilization information for PyTorch Models", "Pytorch Post Training Quantization", "Pytorch Structured Pruning", "PyTorch Quantization Aware Training Model Finalize", "PyTorch Quantization Aware Training Model Init", "Enable a Logger", "XQuant Report Keras", "XQuant Report Pytorch", "XQuant Report Troubleshoot Pytorch", "CoreConfig", "debug_config Module", "exporter Module", "Layer Attributes Filters", "network_editor Module", "qat_config Module", "target_platform_capabilities Module", "trainable_infrastructure Module", "<no title>", "XQuant Extension Tool", "Visualization within TensorBoard", "Model Compression Toolkit User Guide"], "titleterms": {"about": 48, "action": 43, "api": [13, 50], "attribut": 42, "attributequantizationconfig": 45, "awar": [26, 27, 33, 34], "base": [21, 29], "basekerastrainablequant": 46, "basepytorchtrainablequant": 46, "batchnormalignemntlosstyp": 1, "bit": 49, "bitwidthconfig": 0, "bnlayerweightingtyp": 1, "channelaxi": 3, "channelsfilteringstrategi": 6, "class": [2, 3, 4], "comparison": 49, "compress": 50, "configur": [1, 6, 12, 49], "constraint": 50, "convert": 19, "core": 13, "coreconfig": 39, "cosin": 49, "data": [1, 20, 28], "data_gener": 13, "datagenerationconfig": [14, 16], "datainittyp": 1, "debug_config": 40, "debugconfig": 40, "defaultdict": 2, "doc": 13, "document": 50, "editrul": 43, "enabl": 35, "error": 48, "exampl": 48, "export": [13, 41], "extens": 48, "featur": 50, "filter": [42, 43], "final": [26, 33], "flow": 48, "format": [41, 48], "frameworkinfo": 3, "fuse": 45, "gener": [1, 20, 28, 48], "get": [14, 15, 16, 17, 18, 19, 22, 30], "gptq": 13, "gptqhessianscoresconfig": 4, "gradient": [21, 29], "gradientptqconfig": [4, 15, 17], "gradualactivationquantizationconfig": 4, "graph": 48, "guid": 50, "how": 48, "imagegranular": 1, "imagenormalizationtyp": 1, "imagepipelinetyp": 1, "importancemetr": 6, "indic": 13, "infer": 41, "inform": [7, 22, 30], "init": [27, 34], "instal": 50, "judgeabl": 48, "kera": [14, 15, 20, 21, 22, 23, 24, 25, 26, 27, 36, 41], "keras_export_model": 41, "keras_load_quantized_model": 13, "kerasexportserializationformat": 41, "layer": 42, "load": 23, "logger": 35, "manualbitwidthselect": 0, "mctq": 41, "mix": 49, "mixedprecisionquantizationconfig": 5, "model": [14, 15, 16, 17, 22, 23, 26, 27, 30, 33, 34, 41, 50], "modul": [40, 41, 43, 44, 45, 46], "mpdistanceweight": 5, "mpmetricnorm": 5, "name": 41, "network_editor": 43, "onnx": 41, "operatorsetgroup": 45, "operatorsset": 45, "opquantizationconfig": 45, "opset": 41, "output": 41, "outputlosstyp": 1, "overal": 48, "overview": 50, "paramet": 48, "post": [21, 24, 29, 31], "precis": 49, "process": 48, "prune": [6, 7, 13, 25, 32], "ptq": 13, "pytorch": [16, 17, 28, 29, 30, 31, 32, 33, 34, 37, 38, 41], "pytorch_export_model": 41, "pytorchexportserializationformat": 41, "qat": 13, "qat_config": 44, "qatconfig": 44, "qfractionlinearannealingconfig": 4, "quantiz": [21, 23, 24, 26, 27, 29, 31, 33, 34, 41, 48], "quantizationconfig": 8, "quantizationconfigopt": 45, "quantizationerrormethod": 9, "quantizationformat": 41, "quantizationmethod": 45, "quickstart": 50, "refer": 50, "report": [36, 37, 38], "resourc": [22, 30], "resourceutil": 10, "roundingtyp": 4, "run": 48, "schedulertyp": 1, "sdsp": 19, "serial": 41, "set_log_fold": 13, "similar": 49, "structur": [25, 32], "support": 50, "tabl": 13, "target_platform_cap": [13, 45], "targetplatformcap": [18, 19, 45], "technic": 50, "tensorboard": 49, "tool": 48, "toolkit": 50, "tpc": 18, "train": [21, 24, 26, 27, 29, 31, 33, 34], "trainable_infrastructur": [13, 46], "trainablequantizeractivationconfig": 46, "trainablequantizerweightsconfig": 46, "trainingmethod": [44, 46], "troubleshoot": [38, 48], "tutori": 41, "understand": 48, "us": 41, "user": 50, "util": [22, 30], "version": [18, 19, 41], "visual": 49, "width": 49, "within": 49, "wrapper": [11, 13], "xquant": [12, 13, 36, 37, 38, 48], "xquantconfig": 48}}) \ No newline at end of file +Search.setIndex({"alltitles": {"API Docs": [[13, null]], "API Documentation": [[50, "api-documentation"]], "About XQuant Extension Tool": [[48, "about-xquant-extension-tool"]], "Actions": [[43, "actions"]], "Attribute Filters": [[42, "attribute-filters"]], "AttributeQuantizationConfig": [[45, "attributequantizationconfig"]], "BNLayerWeightingType": [[1, "bnlayerweightingtype"]], "BaseKerasTrainableQuantizer": [[46, "basekerastrainablequantizer"]], "BasePytorchTrainableQuantizer": [[46, "basepytorchtrainablequantizer"]], "BatchNormAlignemntLossType": [[1, "batchnormalignemntlosstype"]], "BitWidthConfig": [[0, null]], "ChannelAxis": [[3, "channelaxis"]], "ChannelsFilteringStrategy": [[6, "channelsfilteringstrategy"]], "CoreConfig": [[39, null]], "Cosine Similarity Comparison": [[49, "cosine-similarity-comparison"]], "Data Generation Configuration": [[1, null]], "DataInitType": [[1, "datainittype"]], "DebugConfig": [[40, "debugconfig"]], "DefaultDict Class": [[2, null]], "EditRule": [[43, "editrule"]], "Enable a Logger": [[35, null]], "Filters": [[43, "filters"]], "FrameworkInfo Class": [[3, null]], "Fusing": [[45, "fusing"]], "GPTQHessianScoresConfig Class": [[4, "gptqhessianscoresconfig-class"]], "Get DataGenerationConfig for Keras Models": [[14, null]], "Get DataGenerationConfig for Pytorch Models": [[16, null]], "Get GradientPTQConfig for Keras Models": [[15, null]], "Get GradientPTQConfig for Pytorch Models": [[17, null]], "Get Resource Utilization information for Keras Models": [[22, null]], "Get Resource Utilization information for PyTorch Models": [[30, null]], "Get TargetPlatformCapabilities for sdsp converter version": [[19, null]], "Get TargetPlatformCapabilities for tpc version": [[18, null]], "GradientPTQConfig Class": [[4, null]], "GradualActivationQuantizationConfig": [[4, "gradualactivationquantizationconfig"]], "How to Run": [[48, "how-to-run"]], "ImageGranularity": [[1, "imagegranularity"]], "ImageNormalizationType": [[1, "imagenormalizationtype"]], "ImagePipelineType": [[1, "imagepipelinetype"]], "ImportanceMetric": [[6, "importancemetric"]], "Indices and tables": [[13, "indices-and-tables"]], "Install": [[50, "install"]], "Keras Data Generation": [[20, null]], "Keras Gradient Based Post Training Quantization": [[21, null]], "Keras Post Training Quantization": [[24, null]], "Keras Quantization Aware Training Model Finalize": [[26, null]], "Keras Quantization Aware Training Model Init": [[27, null]], "Keras Structured Pruning": [[25, null]], "Keras Tutorial": [[41, "keras-tutorial"]], "KerasExportSerializationFormat": [[41, "kerasexportserializationformat"]], "Keys in the processing state dictionary": [[40, "id1"]], "Layer Attributes Filters": [[42, null]], "Load Quantized Keras Model": [[23, null]], "MCTQ": [[41, "mctq"]], "MCTQ Quantization Format": [[41, "mctq-quantization-format"]], "ManualBitWidthSelection": [[0, "manualbitwidthselection"]], "Mixed-precision Configuration Bit-width": [[49, "mixed-precision-configuration-bit-width"]], "MixedPrecisionQuantizationConfig": [[5, null]], "Model Compression Toolkit User Guide": [[50, null]], "MpDistanceWeighting": [[5, "mpdistanceweighting"]], "MpMetricNormalization": [[5, "mpmetricnormalization"]], "ONNX": [[41, "onnx"]], "ONNX model output names": [[41, "onnx-model-output-names"]], "ONNX opset version": [[41, "onnx-opset-version"]], "OpQuantizationConfig": [[45, "opquantizationconfig"]], "OperatorSetGroup": [[45, "operatorsetgroup"]], "OperatorsSet": [[45, "operatorsset"]], "OutputLossType": [[1, "outputlosstype"]], "Overall Process Flow": [[48, "overall-process-flow"]], "Overview": [[50, "overview"]], "Pruning Configuration": [[6, null]], "Pruning Information": [[7, null]], "PyTorch Quantization Aware Training Model Finalize": [[33, null]], "PyTorch Quantization Aware Training Model Init": [[34, null]], "Pytorch Data Generation": [[28, null]], "Pytorch Gradient Based Post Training Quantization": [[29, null]], "Pytorch Post Training Quantization": [[31, null]], "Pytorch Structured Pruning": [[32, null]], "Pytorch Tutorial": [[41, "pytorch-tutorial"]], "PytorchExportSerializationFormat": [[41, "pytorchexportserializationformat"]], "QATConfig": [[44, "qatconfig"]], "QFractionLinearAnnealingConfig": [[4, "qfractionlinearannealingconfig"]], "QuantizationConfig": [[8, null]], "QuantizationConfigOptions": [[45, "quantizationconfigoptions"]], "QuantizationErrorMethod": [[9, null]], "QuantizationFormat": [[41, "quantizationformat"]], "QuantizationMethod": [[45, "quantizationmethod"]], "Quickstart": [[50, "quickstart"]], "References": [[50, "references"]], "ResourceUtilization": [[10, null]], "RoundingType": [[4, "roundingtype"]], "SchedulerType": [[1, "schedulertype"]], "Supported Features": [[50, "supported-features"]], "TargetPlatformCapabilities": [[45, "targetplatformcapabilities"]], "Technical Constraints": [[50, "technical-constraints"]], "TrainableQuantizerActivationConfig": [[46, "trainablequantizeractivationconfig"]], "TrainableQuantizerWeightsConfig": [[46, "trainablequantizerweightsconfig"]], "TrainingMethod": [[44, "trainingmethod"], [46, "trainingmethod"]], "Understanding the General Troubleshoots": [[48, "understanding-the-general-troubleshoots"]], "Understanding the Judgeable Troubleshoots": [[48, "understanding-the-judgeable-troubleshoots"]], "Understanding the Quantization Error Graph": [[48, "understanding-the-quantization-error-graph"]], "Use exported model for inference": [[41, "use-exported-model-for-inference"]], "Visualization within TensorBoard": [[49, null]], "XQuant Configuration": [[12, null]], "XQuant Extension Tool": [[48, null]], "XQuant Report Keras": [[36, null]], "XQuant Report Pytorch": [[37, null]], "XQuant Report Troubleshoot Pytorch": [[38, null]], "XQuantConfig Format and Examples": [[48, "xquantconfig-format-and-examples"]], "XQuantConfig parameter": [[48, "id3"]], "core": [[13, "core"]], "data_generation": [[13, "data-generation"]], "debug_config Module": [[40, null]], "exporter": [[13, "exporter"]], "exporter Module": [[41, null]], "gptq": [[13, "gptq"]], "keras serialization format": [[41, "keras-serialization-format"]], "keras_export_model": [[41, "keras-export-model"]], "keras_load_quantized_model": [[13, "keras-load-quantized-model"]], "network_editor Module": [[43, null]], "pruning": [[13, "pruning"]], "ptq": [[13, "ptq"]], "pytorch_export_model": [[41, "pytorch-export-model"]], "qat": [[13, "qat"]], "qat_config Module": [[44, null]], "set_log_folder": [[13, "set-log-folder"]], "target_platform_capabilities": [[13, "target-platform-capabilities"]], "target_platform_capabilities Module": [[45, null]], "trainable_infrastructure": [[13, "trainable-infrastructure"]], "trainable_infrastructure Module": [[46, null]], "wrapper": [[11, null], [13, "wrapper"]], "xquant": [[13, "xquant"]]}, "docnames": ["api/api_docs/classes/BitWidthConfig", "api/api_docs/classes/DataGenerationConfig", "api/api_docs/classes/DefaultDict", "api/api_docs/classes/FrameworkInfo", "api/api_docs/classes/GradientPTQConfig", "api/api_docs/classes/MixedPrecisionQuantizationConfig", "api/api_docs/classes/PruningConfig", "api/api_docs/classes/PruningInfo", "api/api_docs/classes/QuantizationConfig", "api/api_docs/classes/QuantizationErrorMethod", "api/api_docs/classes/ResourceUtilization", "api/api_docs/classes/Wrapper", "api/api_docs/classes/XQuantConfig", "api/api_docs/index", "api/api_docs/methods/get_keras_data_generation_config", "api/api_docs/methods/get_keras_gptq_config", "api/api_docs/methods/get_pytorch_data_generation_config", "api/api_docs/methods/get_pytroch_gptq_config", "api/api_docs/methods/get_target_platform_capabilities", "api/api_docs/methods/get_target_platform_capabilities_sdsp", "api/api_docs/methods/keras_data_generation_experimental", "api/api_docs/methods/keras_gradient_post_training_quantization", "api/api_docs/methods/keras_kpi_data", "api/api_docs/methods/keras_load_quantizad_model", "api/api_docs/methods/keras_post_training_quantization", "api/api_docs/methods/keras_pruning_experimental", "api/api_docs/methods/keras_quantization_aware_training_finalize_experimental", "api/api_docs/methods/keras_quantization_aware_training_init_experimental", "api/api_docs/methods/pytorch_data_generation_experimental", "api/api_docs/methods/pytorch_gradient_post_training_quantization", "api/api_docs/methods/pytorch_kpi_data", "api/api_docs/methods/pytorch_post_training_quantization", "api/api_docs/methods/pytorch_pruning_experimental", "api/api_docs/methods/pytorch_quantization_aware_training_finalize_experimental", "api/api_docs/methods/pytorch_quantization_aware_training_init_experimental", "api/api_docs/methods/set_logger_path", "api/api_docs/methods/xquant_report_keras_experimental", "api/api_docs/methods/xquant_report_pytorch_experimental", "api/api_docs/methods/xquant_report_troubleshoot_pytorch_experimental", "api/api_docs/modules/core_config", "api/api_docs/modules/debug_config", "api/api_docs/modules/exporter", "api/api_docs/modules/layer_filters", "api/api_docs/modules/network_editor", "api/api_docs/modules/qat_config", "api/api_docs/modules/target_platform_capabilities", "api/api_docs/modules/trainable_infrastructure", "api/api_docs/notes/tpc_note", "guidelines/XQuant_Extension_Tool", "guidelines/visualization", "index"], "envversion": {"sphinx": 64, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2}, "filenames": ["api/api_docs/classes/BitWidthConfig.rst", "api/api_docs/classes/DataGenerationConfig.rst", "api/api_docs/classes/DefaultDict.rst", "api/api_docs/classes/FrameworkInfo.rst", "api/api_docs/classes/GradientPTQConfig.rst", "api/api_docs/classes/MixedPrecisionQuantizationConfig.rst", "api/api_docs/classes/PruningConfig.rst", "api/api_docs/classes/PruningInfo.rst", "api/api_docs/classes/QuantizationConfig.rst", "api/api_docs/classes/QuantizationErrorMethod.rst", "api/api_docs/classes/ResourceUtilization.rst", "api/api_docs/classes/Wrapper.rst", "api/api_docs/classes/XQuantConfig.rst", "api/api_docs/index.rst", "api/api_docs/methods/get_keras_data_generation_config.rst", "api/api_docs/methods/get_keras_gptq_config.rst", "api/api_docs/methods/get_pytorch_data_generation_config.rst", "api/api_docs/methods/get_pytroch_gptq_config.rst", "api/api_docs/methods/get_target_platform_capabilities.rst", "api/api_docs/methods/get_target_platform_capabilities_sdsp.rst", "api/api_docs/methods/keras_data_generation_experimental.rst", "api/api_docs/methods/keras_gradient_post_training_quantization.rst", "api/api_docs/methods/keras_kpi_data.rst", "api/api_docs/methods/keras_load_quantizad_model.rst", "api/api_docs/methods/keras_post_training_quantization.rst", "api/api_docs/methods/keras_pruning_experimental.rst", "api/api_docs/methods/keras_quantization_aware_training_finalize_experimental.rst", "api/api_docs/methods/keras_quantization_aware_training_init_experimental.rst", "api/api_docs/methods/pytorch_data_generation_experimental.rst", "api/api_docs/methods/pytorch_gradient_post_training_quantization.rst", "api/api_docs/methods/pytorch_kpi_data.rst", "api/api_docs/methods/pytorch_post_training_quantization.rst", "api/api_docs/methods/pytorch_pruning_experimental.rst", "api/api_docs/methods/pytorch_quantization_aware_training_finalize_experimental.rst", "api/api_docs/methods/pytorch_quantization_aware_training_init_experimental.rst", "api/api_docs/methods/set_logger_path.rst", "api/api_docs/methods/xquant_report_keras_experimental.rst", "api/api_docs/methods/xquant_report_pytorch_experimental.rst", "api/api_docs/methods/xquant_report_troubleshoot_pytorch_experimental.rst", "api/api_docs/modules/core_config.rst", "api/api_docs/modules/debug_config.rst", "api/api_docs/modules/exporter.rst", "api/api_docs/modules/layer_filters.rst", "api/api_docs/modules/network_editor.rst", "api/api_docs/modules/qat_config.rst", "api/api_docs/modules/target_platform_capabilities.rst", "api/api_docs/modules/trainable_infrastructure.rst", "api/api_docs/notes/tpc_note.rst", "guidelines/XQuant_Extension_Tool.rst", "guidelines/visualization.rst", "index.rst"], "indexentries": {"add_metadata (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.add_metadata", false]], "attributefilter (class in model_compression_toolkit.target_platform_capabilities)": [[42, "model_compression_toolkit.target_platform_capabilities.AttributeFilter", false]], "attributequantizationconfig (class in model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.AttributeQuantizationConfig", false]], "base_config (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.quantizationconfigoptions attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.QuantizationConfigOptions.base_config", false]], "basekerastrainablequantizer (class in model_compression_toolkit.trainable_infrastructure)": [[46, "model_compression_toolkit.trainable_infrastructure.BaseKerasTrainableQuantizer", false]], "basepytorchtrainablequantizer (class in model_compression_toolkit.trainable_infrastructure)": [[46, "model_compression_toolkit.trainable_infrastructure.BasePytorchTrainableQuantizer", false]], "batchnormalignemntlosstype (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.BatchNormAlignemntLossType", false]], "bit_width (model_compression_toolkit.core.common.quantization.bit_width_config.manualbitwidthselection attribute)": [[0, "model_compression_toolkit.core.common.quantization.bit_width_config.ManualBitWidthSelection.bit_width", false]], "bitwidthconfig (class in model_compression_toolkit.core)": [[0, "model_compression_toolkit.core.BitWidthConfig", false]], "bnlayerweightingtype (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.BNLayerWeightingType", false]], "changecandidatesactivationquantconfigattr (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.ChangeCandidatesActivationQuantConfigAttr", false]], "changecandidatesactivationquantizationmethod (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.ChangeCandidatesActivationQuantizationMethod", false]], "changecandidatesweightsquantconfigattr (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.ChangeCandidatesWeightsQuantConfigAttr", false]], "changecandidatesweightsquantizationmethod (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.ChangeCandidatesWeightsQuantizationMethod", false]], "changefinalactivationquantconfigattr (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.ChangeFinalActivationQuantConfigAttr", false]], "changefinalweightsquantconfigattr (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.ChangeFinalWeightsQuantConfigAttr", false]], "changefinalweightsquantizationmethod (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.ChangeFinalWeightsQuantizationMethod", false]], "changequantizationparamfunction (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.ChangeQuantizationParamFunction", false]], "channelaxis (class in model_compression_toolkit.core)": [[3, "model_compression_toolkit.core.ChannelAxis", false]], "channels_filtering_strategy (model_compression_toolkit.pruning.pruningconfig attribute)": [[6, "model_compression_toolkit.pruning.PruningConfig.channels_filtering_strategy", false]], "channelsfilteringstrategy (class in model_compression_toolkit.pruning)": [[6, "model_compression_toolkit.pruning.ChannelsFilteringStrategy", false]], "coreconfig (class in model_compression_toolkit.core)": [[39, "model_compression_toolkit.core.CoreConfig", false]], "datagenerationconfig (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.DataGenerationConfig", false]], "datainittype (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.DataInitType", false]], "debugconfig (class in model_compression_toolkit.core)": [[40, "model_compression_toolkit.core.DebugConfig", false]], "default_qco (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.default_qco", false]], "defaultdict (class in model_compression_toolkit)": [[2, "model_compression_toolkit.DefaultDict", false]], "editrule (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.EditRule", false]], "enable_weights_quantization (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.attributequantizationconfig attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.AttributeQuantizationConfig.enable_weights_quantization", false]], "eq (class in model_compression_toolkit.target_platform_capabilities)": [[42, "model_compression_toolkit.target_platform_capabilities.Eq", false]], "filter (model_compression_toolkit.core.common.quantization.bit_width_config.manualbitwidthselection attribute)": [[0, "model_compression_toolkit.core.common.quantization.bit_width_config.ManualBitWidthSelection.filter", false]], "frameworkinfo (class in model_compression_toolkit.core)": [[3, "model_compression_toolkit.core.FrameworkInfo", false]], "fuse_op_quantization_config (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.fusing attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.Fusing.fuse_op_quantization_config", false]], "fusing (class in model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.Fusing", false]], "fusing_patterns (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.fusing_patterns", false]], "get() (model_compression_toolkit.defaultdict method)": [[2, "model_compression_toolkit.DefaultDict.get", false]], "get_keras_data_generation_config() (in module model_compression_toolkit.data_generation)": [[14, "model_compression_toolkit.data_generation.get_keras_data_generation_config", false]], "get_keras_gptq_config() (in module model_compression_toolkit.gptq)": [[15, "model_compression_toolkit.gptq.get_keras_gptq_config", false]], "get_pytorch_data_generation_config() (in module model_compression_toolkit.data_generation)": [[16, "model_compression_toolkit.data_generation.get_pytorch_data_generation_config", false]], "get_pytorch_gptq_config() (in module model_compression_toolkit.gptq)": [[17, "model_compression_toolkit.gptq.get_pytorch_gptq_config", false]], "get_target_platform_capabilities() (in module model_compression_toolkit)": [[18, "model_compression_toolkit.get_target_platform_capabilities", false]], "get_target_platform_capabilities_sdsp() (in module model_compression_toolkit)": [[19, "model_compression_toolkit.get_target_platform_capabilities_sdsp", false]], "gptqhessianscoresconfig (class in model_compression_toolkit.gptq)": [[4, "model_compression_toolkit.gptq.GPTQHessianScoresConfig", false]], "gradientptqconfig (class in model_compression_toolkit.gptq)": [[4, "model_compression_toolkit.gptq.GradientPTQConfig", false]], "gradualactivationquantizationconfig (class in model_compression_toolkit.gptq)": [[4, "model_compression_toolkit.gptq.GradualActivationQuantizationConfig", false]], "greater (class in model_compression_toolkit.target_platform_capabilities)": [[42, "model_compression_toolkit.target_platform_capabilities.Greater", false]], "greatereq (class in model_compression_toolkit.target_platform_capabilities)": [[42, "model_compression_toolkit.target_platform_capabilities.GreaterEq", false]], "imagegranularity (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.ImageGranularity", false]], "imagenormalizationtype (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.ImageNormalizationType", false]], "imagepipelinetype (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.ImagePipelineType", false]], "importance_metric (model_compression_toolkit.pruning.pruningconfig attribute)": [[6, "model_compression_toolkit.pruning.PruningConfig.importance_metric", false]], "importance_scores (model_compression_toolkit.pruning.pruninginfo property)": [[7, "model_compression_toolkit.pruning.PruningInfo.importance_scores", false]], "importancemetric (class in model_compression_toolkit.pruning)": [[6, "model_compression_toolkit.pruning.ImportanceMetric", false]], "insert_preserving_quantizers (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.insert_preserving_quantizers", false]], "is_simd_padding (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.is_simd_padding", false]], "keras_data_generation_experimental() (in module model_compression_toolkit.data_generation)": [[20, "model_compression_toolkit.data_generation.keras_data_generation_experimental", false]], "keras_export_model (class in model_compression_toolkit.exporter)": [[41, "model_compression_toolkit.exporter.keras_export_model", false]], "keras_gradient_post_training_quantization() (in module model_compression_toolkit.gptq)": [[21, "model_compression_toolkit.gptq.keras_gradient_post_training_quantization", false]], "keras_load_quantized_model() (in module model_compression_toolkit)": [[23, "model_compression_toolkit.keras_load_quantized_model", false]], "keras_post_training_quantization() (in module model_compression_toolkit.ptq)": [[24, "model_compression_toolkit.ptq.keras_post_training_quantization", false]], "keras_pruning_experimental() (in module model_compression_toolkit.pruning)": [[25, "model_compression_toolkit.pruning.keras_pruning_experimental", false]], "keras_quantization_aware_training_finalize_experimental() (in module model_compression_toolkit.qat)": [[26, "model_compression_toolkit.qat.keras_quantization_aware_training_finalize_experimental", false]], "keras_quantization_aware_training_init_experimental() (in module model_compression_toolkit.qat)": [[27, "model_compression_toolkit.qat.keras_quantization_aware_training_init_experimental", false]], "keras_resource_utilization_data() (in module model_compression_toolkit.core)": [[22, "model_compression_toolkit.core.keras_resource_utilization_data", false]], "kerasexportserializationformat (class in model_compression_toolkit.exporter)": [[41, "model_compression_toolkit.exporter.KerasExportSerializationFormat", false]], "keys() (model_compression_toolkit.defaultdict method)": [[2, "model_compression_toolkit.DefaultDict.keys", false]], "lut_values_bitwidth (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.attributequantizationconfig attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.AttributeQuantizationConfig.lut_values_bitwidth", false]], "manual_activation_bit_width_selection_list (model_compression_toolkit.core.bitwidthconfig attribute)": [[0, "model_compression_toolkit.core.BitWidthConfig.manual_activation_bit_width_selection_list", false]], "manual_weights_bit_width_selection_list (model_compression_toolkit.core.bitwidthconfig attribute)": [[0, "model_compression_toolkit.core.BitWidthConfig.manual_weights_bit_width_selection_list", false]], "manualbitwidthselection (class in model_compression_toolkit.core.common.quantization.bit_width_config)": [[0, "model_compression_toolkit.core.common.quantization.bit_width_config.ManualBitWidthSelection", false]], "mctwrapper (class in model_compression_toolkit.wrapper.mct_wrapper)": [[11, "model_compression_toolkit.wrapper.mct_wrapper.MCTWrapper", false]], "mixedprecisionquantizationconfig (class in model_compression_toolkit.core)": [[5, "model_compression_toolkit.core.MixedPrecisionQuantizationConfig", false]], "mpdistanceweighting (class in model_compression_toolkit.core)": [[5, "model_compression_toolkit.core.MpDistanceWeighting", false]], "mpmetricnormalization (class in model_compression_toolkit.core)": [[5, "model_compression_toolkit.core.MpMetricNormalization", false]], "name (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.fusing attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.Fusing.name", false]], "name (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.operatorsetgroup attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorSetGroup.name", false]], "name (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.operatorsset attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorsSet.name", false]], "name (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.name", false]], "nodenamefilter (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.NodeNameFilter", false]], "nodenamescopefilter (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.NodeNameScopeFilter", false]], "nodetypefilter (class in model_compression_toolkit.core.network_editor)": [[43, "model_compression_toolkit.core.network_editor.NodeTypeFilter", false]], "noteq (class in model_compression_toolkit.target_platform_capabilities)": [[42, "model_compression_toolkit.target_platform_capabilities.NotEq", false]], "num_score_approximations (model_compression_toolkit.pruning.pruningconfig attribute)": [[6, "model_compression_toolkit.pruning.PruningConfig.num_score_approximations", false]], "operator_groups (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.fusing attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.Fusing.operator_groups", false]], "operator_set (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.operator_set", false]], "operators_set (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.operatorsetgroup attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorSetGroup.operators_set", false]], "operatorsetgroup (class in model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorSetGroup", false]], "operatorsset (class in model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorsSet", false]], "opquantizationconfig (class in model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OpQuantizationConfig", false]], "outputlosstype (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.OutputLossType", false]], "pruning_masks (model_compression_toolkit.pruning.pruninginfo property)": [[7, "model_compression_toolkit.pruning.PruningInfo.pruning_masks", false]], "pruningconfig (class in model_compression_toolkit.pruning)": [[6, "model_compression_toolkit.pruning.PruningConfig", false]], "pruninginfo (class in model_compression_toolkit.pruning)": [[7, "model_compression_toolkit.pruning.PruningInfo", false]], "pytorch_data_generation_experimental() (in module model_compression_toolkit.data_generation)": [[28, "model_compression_toolkit.data_generation.pytorch_data_generation_experimental", false]], "pytorch_export_model (class in model_compression_toolkit.exporter)": [[41, "model_compression_toolkit.exporter.pytorch_export_model", false]], "pytorch_gradient_post_training_quantization() (in module model_compression_toolkit.gptq)": [[29, "model_compression_toolkit.gptq.pytorch_gradient_post_training_quantization", false]], "pytorch_post_training_quantization() (in module model_compression_toolkit.ptq)": [[31, "model_compression_toolkit.ptq.pytorch_post_training_quantization", false]], "pytorch_pruning_experimental() (in module model_compression_toolkit.pruning)": [[32, "model_compression_toolkit.pruning.pytorch_pruning_experimental", false]], "pytorch_quantization_aware_training_finalize_experimental() (in module model_compression_toolkit.qat)": [[33, "model_compression_toolkit.qat.pytorch_quantization_aware_training_finalize_experimental", false]], "pytorch_quantization_aware_training_init_experimental() (in module model_compression_toolkit.qat)": [[34, "model_compression_toolkit.qat.pytorch_quantization_aware_training_init_experimental", false]], "pytorch_resource_utilization_data() (in module model_compression_toolkit.core)": [[30, "model_compression_toolkit.core.pytorch_resource_utilization_data", false]], "pytorchexportserializationformat (class in model_compression_toolkit.exporter)": [[41, "model_compression_toolkit.exporter.PytorchExportSerializationFormat", false]], "qatconfig (class in model_compression_toolkit.qat)": [[44, "model_compression_toolkit.qat.QATConfig", false]], "qc_options (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.operatorsset attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorsSet.qc_options", false]], "qfractionlinearannealingconfig (class in model_compression_toolkit.gptq)": [[4, "model_compression_toolkit.gptq.QFractionLinearAnnealingConfig", false]], "quantization_configurations (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.quantizationconfigoptions attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.QuantizationConfigOptions.quantization_configurations", false]], "quantizationconfig (class in model_compression_toolkit.core)": [[8, "model_compression_toolkit.core.QuantizationConfig", false]], "quantizationconfigoptions (class in model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.QuantizationConfigOptions", false]], "quantizationerrormethod (class in model_compression_toolkit.core)": [[9, "model_compression_toolkit.core.QuantizationErrorMethod", false]], "quantizationformat (class in model_compression_toolkit.exporter)": [[41, "model_compression_toolkit.exporter.QuantizationFormat", false]], "quantizationmethod (class in model_compression_toolkit.target_platform_capabilities)": [[45, "model_compression_toolkit.target_platform_capabilities.QuantizationMethod", false]], "quantize_and_export() (model_compression_toolkit.wrapper.mct_wrapper.mctwrapper method)": [[11, "model_compression_toolkit.wrapper.mct_wrapper.MCTWrapper.quantize_and_export", false]], "resourceutilization (class in model_compression_toolkit.core)": [[10, "model_compression_toolkit.core.ResourceUtilization", false]], "roundingtype (class in model_compression_toolkit.gptq)": [[4, "model_compression_toolkit.gptq.RoundingType", false]], "schedulertype (class in model_compression_toolkit.data_generation)": [[1, "model_compression_toolkit.data_generation.SchedulerType", false]], "schema_version (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.SCHEMA_VERSION", false]], "set_log_folder() (in module model_compression_toolkit)": [[35, "model_compression_toolkit.set_log_folder", false]], "smaller (class in model_compression_toolkit.target_platform_capabilities)": [[42, "model_compression_toolkit.target_platform_capabilities.Smaller", false]], "smallereq (class in model_compression_toolkit.target_platform_capabilities)": [[42, "model_compression_toolkit.target_platform_capabilities.SmallerEq", false]], "targetplatformcapabilities (class in model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities", false]], "tpc_minor_version (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.tpc_minor_version", false]], "tpc_patch_version (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.tpc_patch_version", false]], "tpc_platform_type (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.targetplatformcapabilities attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities.tpc_platform_type", false]], "trainablequantizeractivationconfig (class in model_compression_toolkit.trainable_infrastructure)": [[46, "model_compression_toolkit.trainable_infrastructure.TrainableQuantizerActivationConfig", false]], "trainablequantizerweightsconfig (class in model_compression_toolkit.trainable_infrastructure)": [[46, "model_compression_toolkit.trainable_infrastructure.TrainableQuantizerWeightsConfig", false]], "trainingmethod (class in model_compression_toolkit.trainable_infrastructure)": [[46, "model_compression_toolkit.trainable_infrastructure.TrainingMethod", false]], "type (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.operatorsset attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorsSet.type", false]], "weights_n_bits (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.attributequantizationconfig attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.AttributeQuantizationConfig.weights_n_bits", false]], "weights_per_channel_threshold (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.attributequantizationconfig attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.AttributeQuantizationConfig.weights_per_channel_threshold", false]], "weights_quantization_method (model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.attributequantizationconfig attribute)": [[45, "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.AttributeQuantizationConfig.weights_quantization_method", false]], "xquant_report_keras_experimental() (in module model_compression_toolkit.xquant.keras.facade_xquant_report)": [[36, "model_compression_toolkit.xquant.keras.facade_xquant_report.xquant_report_keras_experimental", false]], "xquant_report_pytorch_experimental() (in module model_compression_toolkit.xquant.pytorch.facade_xquant_report)": [[37, "model_compression_toolkit.xquant.pytorch.facade_xquant_report.xquant_report_pytorch_experimental", false]], "xquant_report_troubleshoot_pytorch_experimental() (in module model_compression_toolkit.xquant.pytorch.facade_xquant_report)": [[38, "model_compression_toolkit.xquant.pytorch.facade_xquant_report.xquant_report_troubleshoot_pytorch_experimental", false]], "xquantconfig (class in model_compression_toolkit.xquant.common.xquant_config)": [[12, "model_compression_toolkit.xquant.common.xquant_config.XQuantConfig", false]]}, "objects": {"model_compression_toolkit": [[2, 0, 1, "", "DefaultDict"], [18, 3, 1, "", "get_target_platform_capabilities"], [19, 3, 1, "", "get_target_platform_capabilities_sdsp"], [23, 3, 1, "", "keras_load_quantized_model"], [35, 3, 1, "", "set_log_folder"]], "model_compression_toolkit.DefaultDict": [[2, 1, 1, "", "get"], [2, 1, 1, "", "keys"]], "model_compression_toolkit.core": [[0, 0, 1, "", "BitWidthConfig"], [3, 0, 1, "", "ChannelAxis"], [39, 0, 1, "", "CoreConfig"], [40, 0, 1, "", "DebugConfig"], [3, 0, 1, "", "FrameworkInfo"], [5, 0, 1, "", "MixedPrecisionQuantizationConfig"], [5, 0, 1, "", "MpDistanceWeighting"], [5, 0, 1, "", "MpMetricNormalization"], [8, 0, 1, "", "QuantizationConfig"], [9, 0, 1, "", "QuantizationErrorMethod"], [10, 0, 1, "", "ResourceUtilization"], [22, 3, 1, "", "keras_resource_utilization_data"], [30, 3, 1, "", "pytorch_resource_utilization_data"]], "model_compression_toolkit.core.BitWidthConfig": [[0, 2, 1, "", "manual_activation_bit_width_selection_list"], [0, 2, 1, "", "manual_weights_bit_width_selection_list"]], "model_compression_toolkit.core.common.quantization.bit_width_config": [[0, 0, 1, "", "ManualBitWidthSelection"]], "model_compression_toolkit.core.common.quantization.bit_width_config.ManualBitWidthSelection": [[0, 2, 1, "", "bit_width"], [0, 2, 1, "", "filter"]], "model_compression_toolkit.core.network_editor": [[43, 0, 1, "", "ChangeCandidatesActivationQuantConfigAttr"], [43, 0, 1, "", "ChangeCandidatesActivationQuantizationMethod"], [43, 0, 1, "", "ChangeCandidatesWeightsQuantConfigAttr"], [43, 0, 1, "", "ChangeCandidatesWeightsQuantizationMethod"], [43, 0, 1, "", "ChangeFinalActivationQuantConfigAttr"], [43, 0, 1, "", "ChangeFinalWeightsQuantConfigAttr"], [43, 0, 1, "", "ChangeFinalWeightsQuantizationMethod"], [43, 0, 1, "", "ChangeQuantizationParamFunction"], [43, 0, 1, "", "EditRule"], [43, 0, 1, "", "NodeNameFilter"], [43, 0, 1, "", "NodeNameScopeFilter"], [43, 0, 1, "", "NodeTypeFilter"]], "model_compression_toolkit.data_generation": [[1, 0, 1, "", "BNLayerWeightingType"], [1, 0, 1, "", "BatchNormAlignemntLossType"], [1, 0, 1, "", "DataGenerationConfig"], [1, 0, 1, "", "DataInitType"], [1, 0, 1, "", "ImageGranularity"], [1, 0, 1, "", "ImageNormalizationType"], [1, 0, 1, "", "ImagePipelineType"], [1, 0, 1, "", "OutputLossType"], [1, 0, 1, "", "SchedulerType"], [14, 3, 1, "", "get_keras_data_generation_config"], [16, 3, 1, "", "get_pytorch_data_generation_config"], [20, 3, 1, "", "keras_data_generation_experimental"], [28, 3, 1, "", "pytorch_data_generation_experimental"]], "model_compression_toolkit.exporter": [[41, 0, 1, "", "KerasExportSerializationFormat"], [41, 0, 1, "", "PytorchExportSerializationFormat"], [41, 0, 1, "", "QuantizationFormat"], [41, 0, 1, "", "keras_export_model"], [41, 0, 1, "", "pytorch_export_model"]], "model_compression_toolkit.gptq": [[4, 0, 1, "", "GPTQHessianScoresConfig"], [4, 0, 1, "", "GradientPTQConfig"], [4, 0, 1, "", "GradualActivationQuantizationConfig"], [4, 0, 1, "", "QFractionLinearAnnealingConfig"], [4, 0, 1, "", "RoundingType"], [15, 3, 1, "", "get_keras_gptq_config"], [17, 3, 1, "", "get_pytorch_gptq_config"], [21, 3, 1, "", "keras_gradient_post_training_quantization"], [29, 3, 1, "", "pytorch_gradient_post_training_quantization"]], "model_compression_toolkit.pruning": [[6, 0, 1, "", "ChannelsFilteringStrategy"], [6, 0, 1, "", "ImportanceMetric"], [6, 0, 1, "", "PruningConfig"], [7, 0, 1, "", "PruningInfo"], [25, 3, 1, "", "keras_pruning_experimental"], [32, 3, 1, "", "pytorch_pruning_experimental"]], "model_compression_toolkit.pruning.PruningConfig": [[6, 2, 1, "", "channels_filtering_strategy"], [6, 2, 1, "", "importance_metric"], [6, 2, 1, "", "num_score_approximations"]], "model_compression_toolkit.pruning.PruningInfo": [[7, 4, 1, "", "importance_scores"], [7, 4, 1, "", "pruning_masks"]], "model_compression_toolkit.ptq": [[24, 3, 1, "", "keras_post_training_quantization"], [31, 3, 1, "", "pytorch_post_training_quantization"]], "model_compression_toolkit.qat": [[44, 0, 1, "", "QATConfig"], [26, 3, 1, "", "keras_quantization_aware_training_finalize_experimental"], [27, 3, 1, "", "keras_quantization_aware_training_init_experimental"], [33, 3, 1, "", "pytorch_quantization_aware_training_finalize_experimental"], [34, 3, 1, "", "pytorch_quantization_aware_training_init_experimental"]], "model_compression_toolkit.target_platform_capabilities": [[42, 0, 1, "", "AttributeFilter"], [42, 0, 1, "", "Eq"], [42, 0, 1, "", "Greater"], [42, 0, 1, "", "GreaterEq"], [42, 0, 1, "", "NotEq"], [45, 0, 1, "", "QuantizationMethod"], [42, 0, 1, "", "Smaller"], [42, 0, 1, "", "SmallerEq"]], "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema": [[45, 0, 1, "", "AttributeQuantizationConfig"], [45, 0, 1, "", "Fusing"], [45, 0, 1, "", "OpQuantizationConfig"], [45, 0, 1, "", "OperatorSetGroup"], [45, 0, 1, "", "OperatorsSet"], [45, 0, 1, "", "QuantizationConfigOptions"], [45, 0, 1, "", "TargetPlatformCapabilities"]], "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.AttributeQuantizationConfig": [[45, 2, 1, "", "enable_weights_quantization"], [45, 2, 1, "", "lut_values_bitwidth"], [45, 2, 1, "", "weights_n_bits"], [45, 2, 1, "", "weights_per_channel_threshold"], [45, 2, 1, "", "weights_quantization_method"]], "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.Fusing": [[45, 2, 1, "", "fuse_op_quantization_config"], [45, 2, 1, "", "name"], [45, 2, 1, "", "operator_groups"]], "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorSetGroup": [[45, 2, 1, "", "name"], [45, 2, 1, "", "operators_set"]], "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.OperatorsSet": [[45, 2, 1, "", "name"], [45, 2, 1, "", "qc_options"], [45, 2, 1, "", "type"]], "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.QuantizationConfigOptions": [[45, 2, 1, "", "base_config"], [45, 2, 1, "", "quantization_configurations"]], "model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema.TargetPlatformCapabilities": [[45, 2, 1, "", "SCHEMA_VERSION"], [45, 2, 1, "", "add_metadata"], [45, 2, 1, "", "default_qco"], [45, 2, 1, "", "fusing_patterns"], [45, 2, 1, "", "insert_preserving_quantizers"], [45, 2, 1, "", "is_simd_padding"], [45, 2, 1, "", "name"], [45, 2, 1, "", "operator_set"], [45, 2, 1, "", "tpc_minor_version"], [45, 2, 1, "", "tpc_patch_version"], [45, 2, 1, "", "tpc_platform_type"]], "model_compression_toolkit.trainable_infrastructure": [[46, 0, 1, "", "BaseKerasTrainableQuantizer"], [46, 0, 1, "", "BasePytorchTrainableQuantizer"], [46, 0, 1, "", "TrainableQuantizerActivationConfig"], [46, 0, 1, "", "TrainableQuantizerWeightsConfig"], [46, 0, 1, "", "TrainingMethod"]], "model_compression_toolkit.wrapper.mct_wrapper": [[11, 0, 1, "", "MCTWrapper"]], "model_compression_toolkit.wrapper.mct_wrapper.MCTWrapper": [[11, 1, 1, "", "quantize_and_export"]], "model_compression_toolkit.xquant.common.xquant_config": [[12, 0, 1, "", "XQuantConfig"]], "model_compression_toolkit.xquant.keras.facade_xquant_report": [[36, 3, 1, "", "xquant_report_keras_experimental"]], "model_compression_toolkit.xquant.pytorch.facade_xquant_report": [[37, 3, 1, "", "xquant_report_pytorch_experimental"], [38, 3, 1, "", "xquant_report_troubleshoot_pytorch_experimental"]]}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "method", "Python method"], "2": ["py", "attribute", "Python attribute"], "3": ["py", "function", "Python function"], "4": ["py", "property", "Python property"]}, "objtypes": {"0": "py:class", "1": "py:method", "2": "py:attribute", "3": "py:function", "4": "py:property"}, "terms": {"": [3, 6, 8, 10, 21, 24, 25, 26, 27, 29, 31, 32, 34, 35, 41, 42, 43, 45, 46, 48, 50], "0": [1, 3, 4, 5, 7, 8, 11, 12, 14, 16, 21, 24, 25, 26, 27, 32, 40, 41, 46, 48], "05": 8, "06": 5, "08153": 46, "1": [1, 3, 4, 5, 7, 8, 11, 12, 17, 20, 21, 22, 24, 25, 26, 28, 29, 30, 31, 32, 33, 40, 41, 48, 50], "10": [20, 21, 24, 27, 28, 29, 31, 34], "100": 40, "10000000000": 5, "14": 11, "15": 41, "16": [12, 41, 48], "1902": 46, "1e": [5, 15, 17], "1st": 15, "2": [3, 8, 12, 15, 17, 20, 28, 40, 45, 46, 48, 50], "20": 49, "2021": 50, "2023": 50, "224": [21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 41], "2f": 40, "2nd": 15, "3": [3, 11, 15, 17, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 41, 46], "32": [4, 5, 11], "3e": [15, 17], "3rd": 15, "4": [15, 17, 20, 21, 24, 25, 27, 28, 29, 31, 32, 34, 48], "4th": 15, "5": [11, 12, 15, 17, 25, 32, 48], "50": [25, 32], "52587890625e": 8, "6": [28, 40], "75": [11, 21, 24, 26, 27], "8": [20, 21, 24, 26, 27, 28, 41, 45, 46], "9": 43, "A": [0, 3, 4, 5, 7, 8, 13, 15, 17, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 36, 37, 38, 39, 40, 43, 44, 45, 50], "And": 48, "As": [5, 48, 49], "By": [4, 5, 11, 25, 29, 31, 32, 41, 49], "For": [3, 8, 12, 18, 19, 20, 21, 24, 26, 27, 28, 34, 41, 45, 46, 47, 48, 49, 50], "If": [2, 3, 4, 5, 12, 15, 17, 21, 24, 26, 27, 29, 31, 39, 40, 41, 42, 45, 48], "In": [5, 20, 21, 24, 27, 28, 29, 31, 34, 41, 42, 44, 48], "It": [2, 11, 12, 45, 46, 48], "No": 1, "One": 49, "The": [0, 1, 3, 4, 5, 6, 7, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 24, 25, 26, 27, 28, 29, 31, 32, 34, 36, 37, 38, 40, 41, 43, 45, 46, 48, 49], "Then": [3, 21, 24, 27, 29, 31, 34, 43, 49], "There": [41, 48, 49], "These": [48, 49], "To": [41, 48, 49], "With": 48, "_": [21, 24, 27, 29, 31, 34, 41], "__call__": 40, "__import__": 40, "__init__": 40, "_input_data": 41, "_model_input_nam": 41, "_model_output_nam": 41, "_with_model_output_loss_object": 48, "about": [3, 4, 7, 13, 15, 17, 21, 24, 26, 27, 40, 41, 45, 46], "abov": [12, 48], "absolut": 9, "abstract": [13, 46], "accept": [15, 40, 45], "access": 7, "accord": [13, 21, 22, 24, 25, 27, 29, 30, 31, 32, 34, 41, 42], "accordingli": 45, "accuraci": [12, 48], "achiev": 25, "act": 7, "act_hessian_default_batch_s": [15, 17], "action": 40, "activ": [0, 3, 4, 5, 8, 10, 11, 21, 22, 24, 27, 29, 30, 31, 34, 41, 43, 44, 45, 46, 48, 49], "activation_bias_correct": 8, "activation_bias_correction_threshold": 8, "activation_channel_equ": 8, "activation_error_method": [8, 11], "activation_memori": 10, "activation_min_max_map": 3, "activation_n_bit": [45, 46], "activation_op": 3, "activation_quantization_candid": 46, "activation_quantization_method": [43, 45, 46], "activation_quantization_param": 46, "activation_quantization_params_fn": 43, "activation_quantizer_map": 3, "activation_quantizer_params_overrid": 44, "activation_training_method": 44, "ad": 45, "adam": [14, 15, 17], "add": [1, 3, 12, 14, 16, 23, 46], "add_metadata": 45, "addit": [23, 41, 48], "address": 45, "advanc": 3, "affect": [21, 24, 26, 27], "after": [13, 21, 23, 24, 27, 34, 48, 50], "aim": [25, 32], "algorithm": 5, "align": [1, 14, 16], "all": [1, 3, 4, 5, 8, 43, 46, 49], "allimag": [1, 16], "allow": [6, 12, 20, 28, 40, 41, 45], "along": 49, "also": [25, 32, 45], "an": [1, 2, 3, 4, 7, 11, 13, 21, 24, 27, 34, 36, 37, 38, 40, 41, 42, 43, 45, 46, 48, 50], "analysi": [25, 32], "analyz": [25, 32, 38], "analyze_similar": 40, "ani": [1, 2, 3, 5, 11, 36, 37, 38, 41, 42, 46], "anneal": 4, "api": [3, 4, 24, 27, 34, 44, 48], "append": 40, "appli": [0, 1, 5, 8, 13, 41, 42, 43, 45, 48], "applic": [21, 22, 24, 25, 26, 27, 41], "approach": 6, "appropri": 48, "approxim": [6, 25, 32], "ar": [3, 5, 12, 18, 19, 21, 24, 25, 27, 29, 31, 32, 34, 40, 41, 45, 46, 47, 48, 49], "architectur": [25, 32], "argument": [4, 40, 41, 45], "arrai": [7, 11], "art": 50, "arxiv": [46, 50], "assess": [25, 32], "associ": [25, 32], "assum": [25, 32], "astyp": 41, "attent": [4, 15, 17, 46], "attirbut": 3, "attr": 42, "attr_nam": 43, "attr_valu": 43, "attr_weights_configs_map": 45, "attribut": [43, 45, 46], "attributefilt": 42, "auto": 13, "automat": 48, "auxiliari": [15, 17], "avail": 41, "averag": [1, 5, 14, 15, 16, 17, 48], "avg": 5, "awar": [13, 44, 46, 50], "axi": [3, 46, 48], "backend": 45, "bar": 40, "base": [1, 4, 5, 8, 9, 11, 13, 15, 17, 18, 19, 20, 25, 28, 31, 32, 46, 48, 50], "base_config": 45, "basenod": 7, "basenodematch": 0, "basic": 46, "batch": [1, 4, 5, 14, 15, 16, 17, 20, 21, 24, 27, 28, 29, 31, 34], "batchnorm": [1, 14, 16, 20, 21, 24, 27, 29, 31, 34], "batchnorm2d": 28, "batchnormalignemntlosstyp": [14, 16], "batchwis": [1, 14], "been": [7, 40], "begin": 4, "behavior": [40, 48], "being": [21, 24, 27, 29, 31, 34, 40, 45, 46], "below": [12, 48], "between": [4, 5, 12, 21, 29, 31, 45, 48, 49], "bia": [4, 11, 15, 17, 21, 24, 26, 27], "bidwidth": 5, "bit": [0, 5, 10, 13, 21, 24, 26, 27, 34, 39, 41, 43, 45, 46, 50], "bit_width": 0, "bit_width_config": [0, 39], "bitwidth": [5, 12, 21, 24, 26, 27, 48], "bitwidthconfig": [13, 39], "block": [46, 49], "bn_alignment_loss_typ": [1, 14, 16], "bn_layer_typ": [1, 14, 16], "bnlayerweightingtyp": [14, 16], "bool": [1, 4, 5, 11, 12, 14, 15, 16, 17, 40, 45, 46], "boolean": 23, "bop": 10, "both": [11, 21, 24, 29, 31, 33, 46, 49], "build": [22, 30, 46, 50], "built": [27, 34, 46], "bypass": 40, "byte": [10, 21, 24, 25, 27, 32, 34, 49], "c": [12, 48], "calcul": [5, 6, 13, 21, 22, 24, 25, 27, 29, 30, 31, 32, 34, 48], "calibr": [11, 21, 22, 24, 27, 29, 30, 31, 34], "call": [22, 30, 35, 45, 49], "callabl": [3, 5, 11, 12, 15, 17, 21, 22, 24, 25, 27, 29, 30, 31, 32, 34, 36, 37, 38, 40, 41, 42], "callback": 40, "can": [3, 4, 8, 11, 13, 15, 17, 20, 22, 25, 28, 30, 32, 40, 41, 43, 45, 46, 48, 49, 50], "candid": [5, 21, 24, 26, 27, 43], "cannot": 45, "capabl": [11, 18, 19, 25, 30, 32], "case": 5, "caus": [12, 13, 38, 48], "chang": [20, 28, 41, 43, 48, 49], "changecandidatesactivationquantconfigattr": 43, "changecandidatesactivationquantizationmethod": 43, "changecandidatesweightsquantconfigattr": 43, "changecandidatesweightsquantizationmethod": 43, "changefinalactivationquantconfigattr": 43, "changefinalweightsquantconfigattr": 43, "changefinalweightsquantizationmethod": 43, "changequantizationmethod": 43, "changequantizationparamfunct": 43, "channel": [3, 6, 7, 13, 25, 32, 45, 46, 49], "channels_filtering_strategi": 6, "check": [5, 41, 42, 43], "choos": [1, 4, 41], "chosen": 49, "circl": 48, "class": [0, 1, 5, 6, 7, 8, 9, 10, 11, 12, 13, 23, 39, 40, 41, 42, 43, 44, 45, 46], "clibrat": 31, "click": 49, "clip": [1, 14, 16], "clone": 50, "coeffici": [3, 21, 24, 26, 27, 29, 31, 45, 46], "cohen": 50, "collaps": 11, "collect": [3, 21, 24, 27, 29, 31, 34, 36, 37, 38, 49], "com": 50, "combin": 45, "common": [0, 12], "compar": [5, 21, 29, 31, 48, 49], "comparison": 50, "compat": 41, "compil": 23, "complet": [4, 11, 40], "completedcompon": 40, "compon": [40, 45, 46, 48], "component_nam": 40, "compress": [11, 13, 20, 25, 28, 29, 32, 48], "comput": [3, 4, 5, 9, 12, 13, 15, 17, 22, 30, 36, 40, 49], "compute_distance_fn": 5, "concat_threshold_upd": 8, "concaten": [12, 45, 48], "concatn": [12, 48], "config": [4, 20, 21, 24, 25, 26, 27, 28, 29, 32, 33, 34, 39, 43, 46], "configur": [0, 4, 5, 8, 10, 11, 13, 14, 15, 16, 17, 20, 21, 24, 25, 26, 27, 28, 29, 31, 32, 33, 34, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 48, 50], "configuration_overwrit": 5, "confirm": 48, "connect": 11, "consid": [6, 14, 16, 25, 32, 45], "consol": 48, "constant": [6, 43, 46], "constraint": [21, 24, 25, 29, 31, 32], "contain": [7, 13, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 38, 46, 48], "conv2d": [3, 20, 21, 24, 26, 27, 28, 43, 45], "conveni": 35, "convent": 48, "convert": [11, 13, 26, 33, 45], "core": [0, 3, 5, 8, 9, 10, 11, 21, 22, 24, 25, 26, 27, 29, 30, 32, 33, 34, 39, 40, 43], "core_config": [21, 22, 24, 26, 27, 29, 30, 31, 33, 34, 40], "coreconfig": [13, 21, 22, 24, 26, 27, 29, 30, 31, 33, 34, 40], "correct": 11, "correspond": [7, 48], "cosin": [48, 50], "count_param": [21, 24, 25, 26, 27], "countermeasur": 48, "cpuexecutionprovid": 41, "creat": [3, 4, 8, 11, 13, 14, 15, 16, 17, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 40, 41, 42, 43, 45, 48], "creation": 41, "crop": 1, "cudaexecutionprovid": 41, "cui": 40, "current": [4, 40, 41], "currentcompon": 40, "custom": [5, 12, 20, 23, 27, 28, 41], "custom_metric_fn": 5, "custom_object": [23, 26, 27], "custom_similarity_metr": 12, "custom_tpc_opset_to_lay": 8, "cut": 40, "dash": 48, "data": [13, 14, 16, 22, 25, 30, 32, 36, 37, 38, 41, 45, 49, 50], "data_gen_batch_s": [1, 14, 16, 20, 28], "data_gener": [1, 14, 16, 20, 28], "data_generation_config": [20, 28], "data_init_typ": [1, 14, 16], "dataclass": [39, 40], "datagenerationconfig": [1, 13, 20, 28], "datainittyp": [14, 16], "dataset": [4, 11, 15, 17, 21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 36, 37, 38, 41, 48, 49], "debug": [39, 40], "debug_config": 39, "debugconfig": 39, "deeper": 49, "def": [21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 40, 41], "default": [1, 2, 4, 5, 6, 11, 14, 15, 16, 17, 21, 24, 25, 29, 31, 32, 39, 41, 44, 45, 49], "default_data_gen_b": [14, 16], "default_factori": 2, "default_keras_extra_pixel": 14, "default_keras_initial_lr": 14, "default_keras_output_loss_multipli": 14, "default_keras_tpc": [21, 24, 25, 27], "default_n_it": [14, 16], "default_onnx_opset_vers": 41, "default_pytorch_bn_layer_typ": 16, "default_pytorch_extra_pixel": 16, "default_pytorch_initial_lr": 16, "default_pytorch_last_layer_typ": 16, "default_pytorch_output_loss_multipli": 16, "default_pytorch_tpc": [29, 31, 32, 34], "default_qco": 45, "default_valu": 2, "default_weight_attr_config": 45, "defaultdict": [3, 13], "defin": [0, 4, 5, 15, 17, 20, 21, 24, 25, 26, 27, 28, 29, 31, 32, 40, 45, 46, 48], "degrad": [12, 13, 38, 48], "demonstr": [41, 45], "dens": [3, 20], "dense_nparam": [25, 32], "depend": [1, 21, 24, 27, 29, 31, 34], "describ": 48, "descript": [11, 40], "desir": [13, 21, 22, 24, 26, 27, 29, 30, 31, 34], "detail": [41, 45, 48], "detect": [12, 13, 38, 48], "determin": [6, 25, 32, 45], "develop": 50, "deviat": 48, "devic": [13, 18], "device_typ": 18, "diagram": 45, "diamant": 50, "dict": [3, 7, 12, 36, 37, 38, 41, 45, 46, 48], "dictionari": [2, 3, 4, 12, 26, 27, 36, 37, 38, 41, 43, 44, 46], "differ": [1, 8, 13, 21, 24, 26, 27, 41, 45, 48, 49], "dikstein": 50, "dir": [12, 48, 49], "directori": [12, 13, 35, 48], "disabl": [15, 17, 40], "displai": [40, 48, 49], "distanc": [5, 11], "distance_weighting_method": [5, 11], "distil": [4, 50], "distribut": 9, "diverg": [9, 49], "divers": 1, "divid": 3, "divis": 49, "dnn": 46, "do": [1, 48, 49], "document": [13, 24, 27, 34, 48], "doe": 48, "doesn": 50, "don": 35, "done": 49, "dot": 49, "dqa": 46, "dror": 50, "dtype": 41, "dummi": 17, "durat": [25, 32], "dure": [4, 13, 14, 15, 16, 17, 18, 19, 36, 37, 38, 41, 43, 45, 46, 47, 49], "e": [3, 5, 11, 21, 24, 27, 29, 31, 34, 50], "each": [5, 6, 7, 12, 21, 24, 25, 27, 29, 31, 32, 34, 43, 45, 46, 48, 49], "easi": 48, "easili": [13, 50], "edit": [39, 40, 43], "editrul": 40, "either": 45, "element": [7, 45], "empti": 2, "emul": 46, "enabl": [1, 5, 8, 11, 13, 15, 17, 40, 46, 50], "enable_activation_quant": [45, 46], "enable_weights_quant": [45, 46], "encapsul": [0, 8], "end_step": 4, "engin": 50, "enhanc": 50, "ensur": 5, "entir": 13, "enum": [1, 3, 4, 6, 9, 46], "epoch": [4, 11, 15, 17], "epsilon": 5, "eptq": 50, "eq": 42, "equal": 42, "er_list": 43, "error": [9, 11, 12, 40], "estim": [4, 46], "etc": [3, 10, 13, 21, 24, 27, 29, 31, 34, 49], "euclidean": 49, "evalu": [5, 36, 37, 38], "even": 48, "exact": 17, "exampl": [3, 8, 11, 15, 17, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 40, 43, 45, 46, 50], "exceed": 48, "execut": 48, "exist": [2, 43, 48], "exp": 5, "exp_distance_weighting_sigma": 5, "expect": [4, 49], "experiment": [13, 20, 28, 50], "explain": [12, 13, 36, 37, 38, 46], "explicitli": 45, "expon": 5, "exponenti": 5, "export": 11, "extend": [25, 32], "extens": [11, 41, 50], "extra": [1, 14, 16], "extra_pixel": [1, 14, 16], "extrem": 48, "f": 40, "facade_xquant_report": [36, 37, 38], "factor": [4, 5, 9, 15, 17], "factori": [0, 4, 39, 40], "fake": 41, "fake_qu": [27, 34], "fakely_qu": 41, "fallback": 45, "fals": [4, 5, 8, 11, 12, 14, 15, 17, 40, 46], "familiar": 48, "featur": 40, "fetch": 45, "few": [49, 50], "field": [18, 19, 42, 45, 47], "figur": [40, 49], "file": [23, 26, 27, 35, 40, 41], "filepath": 23, "filter": [0, 1, 6], "final": [4, 5, 12, 13, 20, 28, 43, 48, 49, 50], "find": [21, 24, 27, 34], "fine": [15, 17, 25, 26, 27, 32, 33, 34], "first": [1, 21, 24, 27, 29, 31, 34, 41, 49], "first_layer_multipli": 1, "fix": 45, "fixed_scal": [18, 19, 45, 47], "fixed_zero_point": [18, 19, 45, 47], "flag": [1, 11, 40, 45], "flatten": [20, 28], "flip": 1, "float": [1, 4, 5, 11, 12, 14, 15, 16, 17, 21, 27, 29, 31, 34, 36, 37, 38, 41, 45, 46, 48, 49], "float32": [25, 32, 41], "float_model": [11, 36, 37, 38, 41, 48], "flush": 40, "fold": [21, 24, 27, 29, 31, 34], "folder": [35, 48], "follow": [3, 4, 11, 12, 40, 46, 48, 49], "footprint": [25, 32], "form": 45, "format": [3, 13], "fraction": 4, "framework": [3, 11, 46], "frameworkquantizationcap": [22, 29, 30, 31], "free": [6, 20, 25, 28, 32, 50], "freez": 46, "freeze_quant_param": 46, "friendli": [25, 32, 50], "from": [3, 4, 11, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 40, 41, 43, 45, 46, 47, 48, 49, 50], "from_config": 46, "function": [3, 4, 5, 11, 12, 13, 14, 15, 16, 17, 20, 23, 25, 28, 32, 35, 40, 43, 45, 46, 48], "fuse_op_quantization_config": 45, "fusing_pattern": 45, "futur": [18, 19, 20, 28, 45, 47], "g": [3, 11, 21, 24, 27, 29, 31, 34], "gather": [45, 49], "gaussian": [1, 14, 16], "gener": [2, 12, 13, 14, 16, 21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 36, 37, 38, 45, 49, 50], "generated_imag": [20, 28], "get": [2, 3, 4, 5, 13, 21, 24, 26, 27, 29, 31, 33, 34, 45, 49], "get_config": 46, "get_input": 41, "get_keras_data_generation_config": [13, 14, 20], "get_keras_gptq_config": [11, 13, 15, 21], "get_ort_session_opt": 41, "get_output": 41, "get_pytorch_data_generation_config": [13, 16, 28], "get_pytorch_gptq_config": [11, 13, 17], "get_target_platform_cap": [13, 18, 45], "get_target_platform_capabilities_sdsp": [13, 19, 45], "git": 50, "github": [41, 50], "given": [2, 21, 22, 24, 27, 29, 30, 31, 34], "gordon": 50, "gptq": [4, 11, 15, 17, 21, 29], "gptq_conf": [15, 17, 29], "gptq_config": [21, 29, 31], "gptq_quantizer_params_overrid": 4, "gptq_representative_data_gen": [21, 29], "grad": 1, "gradient": [1, 4, 11, 13, 31, 50], "gradientptq": [4, 13], "gradientptqconfig": [13, 21, 29], "gradual": 4, "gradual_activation_quant": [15, 17], "gradual_activation_quantization_config": 4, "gradualactivationquant": [15, 17], "gradualactivationquantizationconfig": [15, 17], "granular": [1, 14, 16], "graph": [22, 30, 43, 49], "greater": 42, "greatereq": 42, "greedi": [5, 6], "group": [3, 6, 25, 32, 45], "h": 50, "ha": [7, 40, 41, 42, 43], "habi": 50, "handl": [11, 21, 24, 27, 29, 31, 34], "handler": 35, "hardwar": [13, 25, 32, 45, 46, 50], "have": [3, 41, 42, 48, 49], "henc": 45, "here": [12, 25, 32, 41, 45, 48, 50], "hessian": [4, 5, 6, 9, 11, 15, 17, 25, 32, 50], "hessian_batch_s": [4, 5, 15, 17], "hessian_weights_config": 4, "hessians_num_sampl": 4, "higher": [25, 32], "highlight": 48, "hight": 28, "histogram": [21, 24, 27, 29, 31, 34, 49], "histori": 40, "hmse": 9, "hold": [3, 39, 42, 45], "holder": 46, "how": [3, 6, 21, 22, 24, 27, 29, 31, 34, 40, 41, 46, 50], "howev": 41, "hptq": [45, 50], "http": [46, 50], "hw": 22, "i": [1, 2, 3, 4, 5, 6, 7, 9, 11, 12, 13, 15, 17, 20, 21, 24, 25, 26, 27, 28, 29, 31, 32, 34, 35, 39, 40, 41, 42, 43, 45, 46, 48, 49, 50], "ident": [1, 5], "identifi": [25, 32, 45, 48], "ignor": [18, 19, 45, 47], "ilp": [21, 24, 27, 34], "imag": [1, 4, 5, 11, 14, 16, 20, 21, 24, 27, 28, 29, 31, 34, 48, 49], "image_clip": [1, 14, 16], "image_granular": [1, 14, 16], "image_normalization_typ": [1, 14, 16], "image_pipeline_typ": [1, 14, 16], "imagegranular": [14, 16], "imagenet": 1, "imagenet1k_v1": 32, "imagenormalizationtyp": [14, 16], "imagepipelinetyp": [14, 16], "imagewis": 1, "impact": [25, 32], "implement": [12, 46], "implment": 46, "import": [3, 6, 7, 8, 11, 13, 15, 17, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 40, 41, 43, 46, 48, 49], "importance_metr": 6, "importance_scor": 7, "improv": [5, 25, 32, 48], "imx500": [11, 41, 45], "imx500_tp_model": 18, "in_model": [21, 22, 24, 26, 27, 30, 33, 34], "in_modul": [31, 48], "includ": [4, 7, 11, 21, 24, 27, 29, 31, 34, 45, 46], "increas": [4, 5], "index": [3, 13], "indic": [3, 7, 25, 32, 45, 48], "individu": 48, "induc": 9, "inf": [8, 10, 11], "infer": [13, 26, 33, 45, 46], "inferablequant": [26, 33], "inferencesess": 41, "info": [6, 35, 40], "inform": [3, 4, 13, 15, 17, 18, 19, 21, 24, 25, 27, 29, 31, 32, 34, 40, 45, 46, 47], "infrastructur": 46, "init": [13, 43, 50], "initi": [1, 2, 4, 6, 11, 12, 14, 16, 27, 34, 46, 48], "initial_lr": [1, 14, 16], "initial_q_fract": 4, "inner": 2, "input": [1, 5, 11, 14, 16, 21, 24, 27, 29, 31, 34, 40, 45, 48], "input_sc": 8, "input_shap": 20, "insert": 49, "insert_preserving_quant": 45, "instal": 41, "instanc": [4, 11, 13, 15, 17, 43, 45, 49], "instanti": [4, 8, 44], "instruct": 45, "insuffici": [12, 48], "int": [0, 1, 4, 5, 6, 12, 14, 15, 16, 17, 20, 28, 35, 40, 41, 45, 46, 48], "int8": 41, "integ": [5, 41, 45], "interest": 5, "interfac": [4, 11, 17], "introduc": 46, "inverse_min_max_diff": 1, "involv": [20, 25, 28, 32], "is_detect_under_threshold_quantize_error": 12, "is_keras_layer_export": 41, "is_layer_exportable_fn": 41, "is_pytorch_layer_export": 41, "is_simd_pad": 45, "issu": [5, 41, 48], "item": 48, "iter": [1, 14, 16, 20, 21, 24, 27, 28, 29, 31, 34], "its": [2, 3, 11, 13, 23, 25, 32, 42, 45, 49], "jen": 50, "judg": [12, 13, 38, 48], "judgment": 48, "just": 50, "keep": [33, 40, 50], "kei": [2, 11, 12, 25, 32, 42], "kept": [7, 27, 34], "ker": 27, "kera": [3, 11, 13, 43, 46, 50], "keras_appl": [1, 14], "keras_data_generation_experiment": [13, 20], "keras_default_tpc": 22, "keras_file_path": 41, "keras_gradient_post_training_quant": [13, 15, 21], "keras_load_quantized_model": 23, "keras_post_training_quant": [13, 24, 41, 43, 49], "keras_pruning_experiment": [13, 25], "keras_quantization_aware_training_finalize_experiment": [13, 26], "keras_quantization_aware_training_init_experiment": [13, 26, 27], "keras_resource_utilization_data": [13, 22], "kernel": [3, 21, 24, 26, 27, 43, 46], "kernel_channels_map": 3, "kernel_op": 3, "kernel_ops_attributes_map": 3, "keyword": 45, "kl": [9, 49], "know": [3, 13], "knowledg": [4, 50], "known_dict": 2, "kwarg": 43, "l": [25, 50], "l2": 1, "l2_squar": [1, 14, 16], "l_p_valu": 8, "label": [6, 25, 32, 45, 50], "lambda": 41, "larg": [12, 48], "larger": 5, "last": [3, 4, 5, 48], "last_lay": 5, "last_layer_typ": [1, 16], "latenc": 41, "latest": 50, "launch": 49, "layaer": [13, 38], "layer": [1, 3, 5, 7, 11, 12, 14, 15, 16, 17, 20, 21, 24, 25, 26, 27, 29, 31, 32, 33, 34, 40, 41, 43, 45, 46, 48, 49], "layer_min_max_map": 3, "layer_weighting_typ": [1, 14, 16], "layerfilterparam": 42, "learn": [1, 14, 15, 16, 17, 46], "learnabl": 46, "least": 6, "left": 11, "let": 41, "level": 35, "lfh": [6, 25, 32], "librari": [3, 8], "like": [8, 45], "limit": [6, 21, 24, 26, 27, 29, 31, 34], "line": 48, "linear": [4, 11, 28], "linear_collaps": [8, 11], "linearli": 4, "link": 48, "list": [0, 1, 3, 5, 11, 14, 15, 16, 20, 28, 40, 41, 43, 50], "liter": 45, "ll": [20, 28], "load": [13, 26, 27, 41, 46], "load_model": [26, 27], "loadopt": 23, "log": [4, 12, 13, 15, 17, 35, 48, 49], "log_funct": [4, 15, 17], "log_norm": 4, "log_tensorboard_xqu": 48, "logdir": 49, "logger": [13, 40, 49], "longer": 41, "look": [24, 27, 34, 45, 50], "lookup": 45, "loss": [1, 4, 12, 14, 15, 16, 17, 21, 25, 29, 31, 32, 48], "low": 11, "lp": 9, "lsq": 46, "lut_pot_quant": 45, "lut_sym_quant": 45, "lut_values_bitwidth": 45, "mae": [9, 49], "mai": [20, 21, 24, 27, 28, 29, 31, 34, 42, 49], "main": [11, 45, 48, 49], "make": [9, 40], "manag": [0, 11], "mandatori": 41, "mani": 49, "manipul": [0, 1], "manner": 45, "manual": [0, 13, 39, 48], "manual_activation_bit_width_selection_list": 0, "manual_weights_bit_width_selection_list": 0, "manualweightsbitwidthselect": 0, "map": [3, 45], "mask": 7, "match": [18, 19, 42, 43], "mathemat": 49, "max": [1, 3, 5, 8, 9, 21, 22, 24, 27, 29, 30, 31, 34, 49], "maxbit": 5, "maxim": [21, 24, 27, 34], "mct": [3, 8, 11, 13, 15, 17, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 39, 40, 41, 43, 45, 46, 47, 48, 49, 50], "mct_current_schema": 45, "mct_quantiz": 41, "mct_wrapper": 11, "mctwrapper": 11, "mean": [1, 4, 9, 49], "measur": [6, 10, 12, 48, 49], "meet": [25, 32], "memori": [10, 25, 32, 49], "messag": 48, "metadata": [7, 45], "method": [4, 5, 6, 9, 11, 13, 25, 32, 35, 41, 43, 44, 45, 46], "metric": [4, 5, 6, 12, 36, 37, 38, 48], "metric_epsilon": 5, "metric_norm": 5, "metric_normalization_threshold": 5, "min": [1, 3, 5, 8, 9, 21, 24, 27, 29, 31, 34, 49], "min_threshold": [8, 46], "minbit": 5, "minim": [5, 9, 21, 25, 29, 31, 32], "minimum": 46, "minor": 45, "minut": 50, "mix": [5, 10, 11, 12, 13, 21, 22, 24, 26, 27, 29, 30, 31, 34, 39, 45, 48, 50], "mixed_precis": 11, "mixed_precision_config": [21, 22, 24, 26, 27, 39], "mixedprecisionquantizationconfig": [11, 13, 21, 22, 24, 26, 27, 39], "mkstemp": 41, "mobilenet": [21, 22], "mobilenet_v2": [24, 26, 27, 29, 30, 31, 33, 34, 41], "mobilenetv2": [24, 26, 27, 41, 49], "model": [3, 4, 5, 7, 8, 10, 11, 12, 13, 18, 19, 20, 21, 24, 25, 28, 29, 31, 32, 36, 37, 38, 39, 40, 43, 44, 45, 46, 48, 49], "model_compression_toolkit": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 49], "model_fil": [26, 27], "model_format_onnx_mctq": 41, "model_mp": 5, "model_output": 41, "modifi": [13, 43], "modul": [13, 28, 29, 30, 31, 32, 37, 38], "more": [9, 18, 19, 24, 25, 27, 32, 34, 41, 45, 47, 48, 49], "most": 48, "mse": [8, 9, 11, 12, 48, 49], "much": 40, "multipl": [3, 5, 35, 45], "multiple_tensors_mse_loss": 4, "multipli": [1, 12, 14, 16, 48], "must": [25, 32, 45], "n_epoch": [4, 11, 15, 17, 21], "n_imag": [20, 28], "n_iter": [1, 14, 16, 20, 28], "nadam": 15, "name": [12, 40, 43, 45, 48, 49], "nchw": 3, "ndarrai": 7, "necessari": [4, 11, 41, 46, 48], "need": [3, 11, 13, 21, 24, 27, 29, 31, 34, 41, 42, 46, 48], "neg": [1, 5, 48], "negative_min_max_diff": [1, 16], "network": [3, 6, 11, 33, 39, 40, 43, 49, 50], "network_editor": [13, 40], "netzer": 50, "neural": [6, 11, 50], "neuron": 7, "new": [43, 45], "next": [20, 28, 41, 42], "nhwc": 3, "nn": [28, 37, 38], "no_norm": 1, "no_quantization_op": 3, "noclip": [8, 9], "node": [0, 27, 34, 41, 43, 46, 49], "node_nam": 43, "node_name_scop": 43, "node_typ": 43, "nodenamefilt": 43, "nodenamescopefilt": 43, "nodetypefilt": 43, "nois": 9, "non": [5, 15, 17, 45], "none": [1, 2, 4, 5, 8, 11, 12, 15, 17, 21, 23, 24, 27, 29, 31, 34, 35, 39, 40, 41, 43, 44, 45, 46], "norm": [9, 49], "norm_scor": [4, 5], "normal": [1, 4, 5, 14, 16], "note": [21, 24, 26, 27], "notebook": 50, "noteq": 42, "notic": [20, 25, 28, 32, 41], "now": [6, 18, 19, 34, 41, 45, 46, 47, 49], "np": [7, 11, 21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 41], "num_calibration_batch": [21, 24, 27, 29, 31, 34], "num_interest_points_factor": 5, "num_of_imag": [5, 11, 21, 24], "num_score_approxim": [6, 25, 32], "number": [1, 4, 5, 6, 11, 12, 14, 15, 16, 17, 20, 21, 24, 25, 27, 28, 29, 31, 32, 34, 40, 45, 46, 48], "numel": 32, "numer": 5, "numpi": [21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 41], "o": 50, "object": [0, 3, 4, 5, 6, 10, 12, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 26, 27, 29, 30, 31, 34, 41, 43, 45, 46, 48], "observ": [21, 29, 31, 45, 49], "one": [5, 42, 49], "onli": [3, 4, 5, 6, 12, 21, 24, 26, 27, 41, 45], "onlin": [27, 34], "onnx": 11, "onnx_file_path": 41, "onnx_opset_vers": 41, "onnxruntim": 41, "op": [42, 45], "open": [41, 49, 50], "oper": [3, 10, 40, 42, 45], "operator_group": 45, "operator_set": 45, "operators_set": 45, "operatorsetnam": 45, "opquantizationconfig": [18, 19, 47], "optim": [1, 3, 4, 10, 11, 13, 14, 15, 16, 17, 18, 19, 21, 22, 24, 27, 29, 30, 31, 34, 39, 45, 46, 47, 50], "optimizer_bia": 4, "optimizer_quantization_paramet": 4, "optimizer_rest": [4, 15, 17], "optimizerv2": 15, "option": [11, 13, 21, 23, 24, 25, 27, 29, 31, 32, 34, 41, 45], "order": [15, 17, 21, 24, 27, 34, 40, 41, 42, 44], "org": 46, "orient": [13, 46], "origin": [25, 35, 36, 37, 38, 49], "ort": 41, "other": [1, 11, 15, 17, 48], "otherwis": 45, "our": [21, 24, 26, 27, 34, 50], "out": [3, 6], "out1": 50, "out2": 50, "out3": 50, "out_channel_axis_map": 3, "outlier": [12, 48], "output": [1, 3, 12, 14, 16, 20, 21, 24, 27, 28, 29, 31, 33, 34, 40, 45, 48, 49, 50], "output_image_s": [20, 28], "output_loss_multipli": [1, 14, 16], "output_loss_typ": [1, 14, 16], "output_nam": 41, "outputlosstyp": [14, 16], "over": 5, "overrid": [4, 44], "overwrit": 5, "p": 32, "packag": [41, 46, 50], "pad": 45, "page": 13, "pair": 49, "param": [17, 40, 43, 46], "param_item": 11, "paramet": [1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "pars": 45, "part": 41, "pass": [2, 3, 5, 15, 17, 21, 24, 25, 26, 27, 29, 31, 32, 33, 34, 43], "patch": 45, "path": [11, 13, 23, 35, 41, 48, 49], "pattern": 45, "pdf": 46, "per": [1, 3, 4, 21, 24, 27, 34, 45, 46, 49], "per_sampl": 4, "percentag": [5, 40], "peretz": 50, "perform": [6, 10, 11, 20, 25, 28, 32], "phase": 49, "pinpoint": 40, "pip": [41, 50], "pipelin": [1, 11, 14, 16], "pixel": [1, 14, 16], "place": 45, "plan": 41, "platform": [11, 18, 19, 21, 24, 25, 26, 27, 30, 32, 45], "pleas": [24, 27, 34, 41, 44, 48, 50], "plot": [40, 49], "point": [4, 5, 15, 17, 21, 29, 31, 36, 37, 38, 45, 49], "posit": 45, "possibl": [9, 21, 24, 27, 34, 45, 49], "post": [4, 11, 13, 25, 27, 32, 34, 50], "power": [21, 24, 27, 29, 31, 34, 45], "power_of_two": 45, "poweroftwo": 46, "pre": 5, "preced": [21, 24, 27, 29, 31, 34], "precis": [5, 10, 11, 12, 13, 21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 34, 39, 45, 48, 50], "predefin": [5, 6], "predict": 41, "prepar": [11, 13, 27, 34], "preprint": 50, "present": [2, 48, 49], "preserv": 45, "pretrain": [33, 34], "prevent": 5, "print": 40, "prior": 5, "prioriti": 11, "problemat": 40, "procedur": 48, "process": [4, 5, 8, 13, 14, 15, 16, 17, 18, 19, 20, 25, 28, 32, 39, 43, 44, 45, 47, 49], "product": 49, "progress": 40, "progress_info_callback": 40, "progress_perc": 40, "progressinfocallback": 40, "project": [41, 50], "properti": 7, "propos": [46, 48], "provid": [2, 11, 20, 25, 28, 32, 40, 41, 45, 46, 48, 49], "prune": [10, 50], "pruned_model": [25, 32], "pruning_config": [25, 32], "pruning_info": [25, 32], "pruning_mask": 7, "pruning_num_score_approxim": 6, "pruningconfig": [6, 13, 25, 32], "pruninginfo": [7, 13, 25, 32], "ptq": [11, 24, 31, 41, 48], "purpos": [20, 28, 40], "py": 50, "pydantic_cor": 45, "pypi": 50, "python": [35, 50], "pytorch": [11, 13, 45, 46, 50], "pytorch_data_generation_experiment": [13, 28], "pytorch_default_tpc": 30, "pytorch_gradient_post_training_quant": [13, 17, 29], "pytorch_post_training_quant": [13, 31, 41, 48], "pytorch_pruning_experiment": [13, 32], "pytorch_quantization_aware_training_finalize_experiment": [13, 33], "pytorch_quantization_aware_training_init_experiment": [13, 33, 34], "pytorch_resource_utilization_data": [13, 30], "q": 41, "q_fraction_scheduler_polici": 4, "qat": [26, 27, 33, 34, 44], "qat_config": [13, 27, 34], "qatconfig": [27, 34], "qc": 8, "qc_option": 45, "qmodel": 11, "qnnpack": 45, "quant": 41, "quantifi": [7, 49], "quantiz": [0, 3, 4, 5, 8, 9, 11, 12, 13, 15, 17, 20, 22, 28, 30, 36, 37, 38, 39, 40, 43, 44, 45, 46, 49, 50], "quantization_config": [39, 46], "quantization_configur": 45, "quantization_format": 41, "quantization_info": [21, 24, 26, 27, 29, 31, 33, 34], "quantization_preserv": [18, 19, 45, 47], "quantizationconfig": [13, 39], "quantizationerrormethod": [8, 11, 13], "quantizationmethod": [3, 46], "quantize_and_export": 11, "quantize_reported_dir": [12, 48], "quantized_exportable_model": 41, "quantized_info": 48, "quantized_model": [11, 21, 24, 26, 27, 33, 34, 36, 37, 38, 48], "quantized_modul": [29, 31], "quantizewrapp": [13, 27, 33, 34], "question": 41, "r": 50, "radam": 16, "rais": 45, "random": [21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 41], "random_data_gen": 48, "rang": [3, 12, 21, 24, 27, 29, 31, 34, 48], "rate": [1, 14, 15, 16, 17], "ratio": [11, 12, 48], "readi": 33, "readm": 41, "receiv": [11, 40], "recent": 48, "recommend": 48, "recov": [25, 32], "red": 48, "reduc": [5, 25, 32], "reduce_on_plateau": [1, 14], "reduce_on_plateau_with_reset": 16, "reduceonplateau": 1, "refer": [41, 48], "refine_mp_solut": 5, "regard": 42, "regular": [1, 4, 15, 17], "regularization_factor": [4, 15, 17], "regularized_min_max_diff": [1, 14], "relat": [3, 7, 13, 45], "releas": 50, "relev": 41, "relu": 3, "relu_bound_to_power_of_2": 8, "remain": 40, "remov": [12, 25, 32, 33, 48], "replac": [26, 48], "report": [12, 13, 48], "report_dir": [12, 48], "repositori": 41, "repr_datagen": [21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34], "repr_dataset": [36, 37, 38, 41], "repres": [4, 5, 10, 11, 15, 17, 21, 24, 25, 26, 27, 29, 31, 32, 33, 34, 36, 37, 38, 40, 41, 43, 45, 48, 49], "representative_data_gen": [21, 22, 24, 25, 27, 29, 30, 31, 32, 34, 41, 48], "representative_dataset": 11, "request": 2, "requir": [21, 24, 27, 29, 31, 34, 46, 49], "research": 50, "reshap": [3, 20], "residu": 11, "residual_collaps": [8, 11], "resnet50": [25, 32, 41], "resnet50_weight": 32, "resourc": [6, 10, 11, 13, 21, 24, 25, 26, 27, 32, 33, 34, 49], "resourceutil": [13, 21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 34], "respect": 48, "respectivli": 3, "rest": 4, "result": 48, "retrain": [25, 32], "retriev": [18, 19, 40, 45], "return": [2, 4, 5, 7, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41], "round": 4, "rounding_typ": 4, "ru": [21, 24, 26, 27], "ru_data": [22, 30], "rule": [40, 43], "run": [4, 15, 17, 40, 41, 49], "runner": 40, "same": [1, 41, 45], "sampl": [4, 15, 17, 49], "save": [3, 11, 12, 27, 35, 41, 46, 48], "save_model_path": [11, 41], "saved_model": 23, "savedmodel": 23, "scalar": 49, "scale": [4, 5, 45], "scale_log_norm": 4, "schedul": [1, 4, 14, 16, 40], "scheduler_typ": [1, 14, 16], "schedulertyp": [14, 16], "schema": 45, "schema_vers": 45, "score": [4, 5, 6, 7, 9, 11, 15, 17, 25, 32], "sdsp": [11, 13, 45], "sdsp_v3_14": 19, "sdsp_version": [11, 19], "search": [5, 10, 13, 21, 24, 27, 29, 31, 34], "second": 49, "section": 40, "see": [4, 17, 48, 50], "seen": 49, "select": [0, 3, 6, 8, 9, 11, 13, 39, 41, 44, 45, 46], "self": [40, 45], "semiconductor": 50, "sensit": [5, 6, 25, 32], "sequenti": [20, 28], "serial": 13, "serialization_format": 41, "sess": 41, "session": 41, "set": [3, 11, 12, 13, 15, 17, 20, 21, 24, 25, 26, 27, 28, 29, 31, 32, 34, 35, 36, 37, 38, 40, 41, 43, 45, 46, 48, 49], "set_log_fold": [35, 48, 49], "setup": [11, 50], "sever": [21, 24, 27, 29, 31, 34, 49], "shift": 48, "shift_negative_activation_correct": 8, "shift_negative_params_search": 8, "shift_negative_ratio": 8, "shift_negative_threshold_recalcul": 8, "shortli": 45, "should": [3, 6, 15, 21, 22, 24, 25, 26, 27, 29, 31, 32, 34, 41, 45, 49], "show": 49, "shown": 48, "sigma": 5, "signal": 9, "signed": 45, "signific": [7, 48], "significantli": 48, "simd": [25, 32, 45], "simd_siz": 45, "similar": [9, 12, 36, 37, 38, 40, 48, 50], "similarli": 45, "simpl": [20, 28], "simplic": [20, 28], "simul": 40, "simulate_schedul": 40, "simultan": 45, "singl": 45, "six": 48, "size": [1, 4, 5, 14, 15, 16, 17, 20, 21, 24, 26, 27, 28, 34, 41, 46], "skip": [12, 40, 41, 48], "slowli": 41, "small": 48, "smaller": 42, "smallereq": 42, "smooth": [1, 46], "smoothing_and_augment": [1, 14, 16], "so": [11, 41], "softmax": 3, "softmax_shift": 8, "softquant": 4, "solut": 50, "solver": [21, 24, 27, 34], "some": [18, 19, 20, 28, 41, 45, 47, 49], "soni": 50, "sonysemiconductorsolut": 50, "sourc": 50, "specif": [0, 3, 11, 13, 25, 32, 43, 48, 49], "specifi": [6, 11, 12, 14, 16, 18, 20, 23, 25, 28, 32, 41, 45, 48], "sphinx": 13, "sqnr": [12, 48], "squar": [1, 9], "stabl": 50, "stage": 49, "standard": [25, 32, 40, 46], "start": [20, 28, 41, 46, 50], "start_step": 4, "state": 50, "state_dict": 32, "statist": [3, 21, 24, 27, 29, 31, 34, 49], "stderr": 40, "ste": [4, 44, 46], "step": [1, 4, 40, 46, 48], "store": [7, 46], "str": [3, 11, 12, 18, 19, 21, 22, 24, 25, 27, 29, 30, 31, 32, 34, 35, 36, 37, 38, 40, 41, 42, 45, 48], "straight": [4, 46], "strategi": [6, 25, 32], "string": 43, "structur": [13, 50], "student": 4, "success": 11, "suffer": 41, "suggest": 48, "sum": [10, 22, 25, 30, 32], "support": [4, 11, 41], "supported_input_activation_n_bit": 45, "sure": 40, "sy": 40, "symmetr": [21, 24, 27, 29, 31, 34, 45, 46], "t": [35, 50], "tab": 49, "tabl": 45, "tag": 49, "take": [5, 24, 27, 34, 50], "target": [4, 11, 13, 18, 19, 21, 22, 24, 25, 26, 27, 30, 32, 33, 34, 45], "target_platform_cap": [21, 22, 24, 25, 27, 29, 30, 31, 32, 34, 42, 46], "target_q_fract": 4, "target_resource_util": [21, 24, 25, 27, 29, 31, 32, 34], "targetplatformcap": [13, 21, 22, 24, 25, 27, 29, 30, 31, 32, 34], "teacher": 4, "tempfil": 41, "tensor": [5, 11, 12, 15, 17, 20, 22, 28, 30, 45, 46, 49, 50], "tensorboard": [40, 50], "tensorflow": [3, 11, 13, 15, 20, 21, 22, 24, 25, 26, 27, 41, 43, 45, 50], "tf": [3, 11, 15, 20, 23, 26, 27], "tflite": [41, 45], "than": [5, 42, 48], "thei": 3, "them": [45, 49], "thi": [5, 7, 8, 9, 11, 13, 20, 21, 23, 24, 25, 26, 27, 28, 29, 31, 32, 34, 35, 40, 41, 45, 46, 48, 50], "those": 48, "three": [3, 48], "threshold": [5, 8, 9, 11, 12, 21, 24, 27, 29, 31, 34, 45, 46, 48], "threshold_bitwidth_mixed_precis": 48, "threshold_bitwidth_mixed_precision_with_model_output_loss_object": 12, "threshold_degrade_layer_ratio": [12, 48], "threshold_quantize_error": [12, 48], "threshold_ratio_unbalanced_concaten": [12, 48], "threshold_zscore_outlier_remov": [12, 48], "through": [4, 20, 25, 28, 46], "throughout": 4, "thu": [25, 32, 49], "time": [3, 6, 46], "togeth": [25, 32], "tool": [11, 13, 46, 50], "toolkit": [11, 13, 20, 28, 29, 48], "torch": [17, 28, 37, 38, 41, 50], "torchscript": 41, "torchvis": [1, 16, 29, 30, 31, 32, 33, 34, 41], "total": [10, 22, 30, 40], "total_memori": 10, "totalcompon": 40, "tpc": [11, 13, 25, 32, 45], "tpc_minor_vers": 45, "tpc_patch_vers": 45, "tpc_platform_typ": 45, "tpc_v1_0": 18, "tpc_version": 18, "trace": 41, "track": 40, "train": [4, 11, 13, 44, 46, 50], "train_bia": 4, "trainabl": [23, 26, 46], "trainable_infrastructur": 44, "trainablequant": 26, "transform": [1, 21, 24, 27, 29, 31, 34], "transpos": 3, "treat": 45, "troubleshoot": 13, "true": [1, 5, 8, 11, 12, 15, 16, 17, 23, 33, 34, 40, 46], "try": 5, "tun": 34, "tune": [15, 17, 25, 26, 27, 32, 33], "tupl": [1, 3, 11, 14, 16, 20, 21, 24, 25, 28, 29, 31, 32, 43, 45], "tutori": 48, "two": [5, 12, 21, 24, 27, 29, 31, 34, 41, 45, 48, 49], "type": [0, 1, 2, 4, 5, 6, 7, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 26, 28, 29, 30, 31, 32, 35, 36, 37, 38, 40, 41, 43, 45, 48], "ui": 49, "unbalanc": [12, 48], "unchang": 40, "under": 49, "unifi": 11, "uniform": [45, 46], "union": [1, 14, 16, 20, 21, 22, 24, 25, 27, 28, 29, 30, 31, 32, 34, 45], "uniqu": 45, "up": [6, 20, 28, 35, 45, 49], "updat": [4, 11], "upon": 46, "us": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50], "use_hessian_based_scor": [5, 11], "use_hessian_based_weight": [15, 17], "use_hessian_sample_attent": [15, 17], "use_mixed_precis": 11, "user": [11, 13, 21, 24, 26, 27, 29, 31, 33, 34, 40, 48], "userinform": [21, 24, 29, 31], "util": [6, 11, 13, 21, 24, 25, 26, 27, 32, 33, 34, 46], "v": 50, "valid": [36, 37, 38, 45, 46, 48], "validation_dataset": [36, 37, 38, 48], "validationerror": 45, "valu": [1, 2, 3, 4, 5, 6, 9, 11, 12, 21, 24, 25, 26, 27, 32, 40, 41, 42, 43, 45, 46, 48], "valuabl": 9, "variabl": [11, 15, 17], "variou": [11, 20, 28, 49], "vector": [4, 49], "verbos": 35, "version": [11, 13, 20, 28, 45], "via": [41, 50], "view": 49, "visit": [44, 50], "visual": [40, 48, 50], "wa": [2, 41, 48], "wai": [49, 50], "walk": [20, 28], "want": 3, "warn": [11, 48], "we": [3, 20, 21, 24, 25, 27, 28, 32, 34, 41, 43, 45, 46, 49], "weight": [0, 1, 3, 4, 5, 8, 10, 11, 14, 15, 16, 17, 21, 22, 25, 27, 29, 30, 31, 32, 33, 34, 41, 43, 44, 45, 46, 49], "weight_quantizer_params_overrid": 44, "weight_training_method": 44, "weights_bias_correct": [8, 11], "weights_channels_axi": 46, "weights_compression_ratio": 11, "weights_error_method": 8, "weights_memori": [6, 10, 21, 24, 25, 27, 32, 34], "weights_n_bit": [43, 45, 46], "weights_per_channel_threshold": [45, 46], "weights_quantization_candid": 46, "weights_quantization_method": [43, 45, 46], "weights_quantization_param": 46, "weights_quantization_params_fn": 43, "weights_second_moment_correct": 8, "were": 49, "when": [1, 2, 3, 4, 5, 6, 9, 10, 12, 13, 15, 17, 21, 24, 26, 27, 40, 41, 42, 44, 45, 46, 48, 49], "where": [7, 12, 41, 43, 48, 49], "whether": [4, 5, 7, 11, 14, 15, 16, 17, 23, 40, 41, 45, 46], "which": [4, 6, 40, 41, 42, 43, 45, 46], "while": [8, 21, 24, 26, 27, 34, 40, 45], "who": 48, "width": [0, 5, 12, 13, 21, 24, 27, 28, 34, 39, 45, 48, 50], "within": [40, 45, 48, 50], "without": 13, "work": 50, "would": 49, "wrap": [2, 3, 23, 27, 34, 42, 45, 46], "wrapper": [27, 33, 34, 46], "writer": 49, "x": 48, "xquant": [11, 50], "xquant_config": [12, 36, 37, 38, 48], "xquant_report_keras_experiment": [13, 36], "xquant_report_pytorch_experiment": [13, 37, 48], "xquant_report_troubleshoot_pytorch_experiment": [12, 13, 38, 48], "xquantconfig": [12, 13, 36, 37, 38], "y": 48, "yield": [21, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 41], "you": [8, 11, 40, 41, 45, 49, 50], "your": [41, 48], "z": 11, "z_score": [12, 48], "z_threshold": [8, 11], "zero": [5, 45]}, "titles": ["BitWidthConfig", "Data Generation Configuration", "DefaultDict Class", "FrameworkInfo Class", "GradientPTQConfig Class", "MixedPrecisionQuantizationConfig", "Pruning Configuration", "Pruning Information", "QuantizationConfig", "QuantizationErrorMethod", "ResourceUtilization", "wrapper", "XQuant Configuration", "API Docs", "Get DataGenerationConfig for Keras Models", "Get GradientPTQConfig for Keras Models", "Get DataGenerationConfig for Pytorch Models", "Get GradientPTQConfig for Pytorch Models", "Get TargetPlatformCapabilities for tpc version", "Get TargetPlatformCapabilities for sdsp converter version", "Keras Data Generation", "Keras Gradient Based Post Training Quantization", "Get Resource Utilization information for Keras Models", "Load Quantized Keras Model", "Keras Post Training Quantization", "Keras Structured Pruning", "Keras Quantization Aware Training Model Finalize", "Keras Quantization Aware Training Model Init", "Pytorch Data Generation", "Pytorch Gradient Based Post Training Quantization", "Get Resource Utilization information for PyTorch Models", "Pytorch Post Training Quantization", "Pytorch Structured Pruning", "PyTorch Quantization Aware Training Model Finalize", "PyTorch Quantization Aware Training Model Init", "Enable a Logger", "XQuant Report Keras", "XQuant Report Pytorch", "XQuant Report Troubleshoot Pytorch", "CoreConfig", "debug_config Module", "exporter Module", "Layer Attributes Filters", "network_editor Module", "qat_config Module", "target_platform_capabilities Module", "trainable_infrastructure Module", "<no title>", "XQuant Extension Tool", "Visualization within TensorBoard", "Model Compression Toolkit User Guide"], "titleterms": {"about": 48, "action": 43, "api": [13, 50], "attribut": 42, "attributequantizationconfig": 45, "awar": [26, 27, 33, 34], "base": [21, 29], "basekerastrainablequant": 46, "basepytorchtrainablequant": 46, "batchnormalignemntlosstyp": 1, "bit": 49, "bitwidthconfig": 0, "bnlayerweightingtyp": 1, "channelaxi": 3, "channelsfilteringstrategi": 6, "class": [2, 3, 4], "comparison": 49, "compress": 50, "configur": [1, 6, 12, 49], "constraint": 50, "convert": 19, "core": 13, "coreconfig": 39, "cosin": 49, "data": [1, 20, 28], "data_gener": 13, "datagenerationconfig": [14, 16], "datainittyp": 1, "debug_config": 40, "debugconfig": 40, "defaultdict": 2, "dictionari": 40, "doc": 13, "document": 50, "editrul": 43, "enabl": 35, "error": 48, "exampl": 48, "export": [13, 41], "extens": 48, "featur": 50, "filter": [42, 43], "final": [26, 33], "flow": 48, "format": [41, 48], "frameworkinfo": 3, "fuse": 45, "gener": [1, 20, 28, 48], "get": [14, 15, 16, 17, 18, 19, 22, 30], "gptq": 13, "gptqhessianscoresconfig": 4, "gradient": [21, 29], "gradientptqconfig": [4, 15, 17], "gradualactivationquantizationconfig": 4, "graph": 48, "guid": 50, "how": 48, "imagegranular": 1, "imagenormalizationtyp": 1, "imagepipelinetyp": 1, "importancemetr": 6, "indic": 13, "infer": 41, "inform": [7, 22, 30], "init": [27, 34], "instal": 50, "judgeabl": 48, "kei": 40, "kera": [14, 15, 20, 21, 22, 23, 24, 25, 26, 27, 36, 41], "keras_export_model": 41, "keras_load_quantized_model": 13, "kerasexportserializationformat": 41, "layer": 42, "load": 23, "logger": 35, "manualbitwidthselect": 0, "mctq": 41, "mix": 49, "mixedprecisionquantizationconfig": 5, "model": [14, 15, 16, 17, 22, 23, 26, 27, 30, 33, 34, 41, 50], "modul": [40, 41, 43, 44, 45, 46], "mpdistanceweight": 5, "mpmetricnorm": 5, "name": 41, "network_editor": 43, "onnx": 41, "operatorsetgroup": 45, "operatorsset": 45, "opquantizationconfig": 45, "opset": 41, "output": 41, "outputlosstyp": 1, "overal": 48, "overview": 50, "paramet": 48, "post": [21, 24, 29, 31], "precis": 49, "process": [40, 48], "prune": [6, 7, 13, 25, 32], "ptq": 13, "pytorch": [16, 17, 28, 29, 30, 31, 32, 33, 34, 37, 38, 41], "pytorch_export_model": 41, "pytorchexportserializationformat": 41, "qat": 13, "qat_config": 44, "qatconfig": 44, "qfractionlinearannealingconfig": 4, "quantiz": [21, 23, 24, 26, 27, 29, 31, 33, 34, 41, 48], "quantizationconfig": 8, "quantizationconfigopt": 45, "quantizationerrormethod": 9, "quantizationformat": 41, "quantizationmethod": 45, "quickstart": 50, "refer": 50, "report": [36, 37, 38], "resourc": [22, 30], "resourceutil": 10, "roundingtyp": 4, "run": 48, "schedulertyp": 1, "sdsp": 19, "serial": 41, "set_log_fold": 13, "similar": 49, "state": 40, "structur": [25, 32], "support": 50, "tabl": 13, "target_platform_cap": [13, 45], "targetplatformcap": [18, 19, 45], "technic": 50, "tensorboard": 49, "tool": 48, "toolkit": 50, "tpc": 18, "train": [21, 24, 26, 27, 29, 31, 33, 34], "trainable_infrastructur": [13, 46], "trainablequantizeractivationconfig": 46, "trainablequantizerweightsconfig": 46, "trainingmethod": [44, 46], "troubleshoot": [38, 48], "tutori": 41, "understand": 48, "us": 41, "user": 50, "util": [22, 30], "version": [18, 19, 41], "visual": 49, "width": 49, "within": 49, "wrapper": [11, 13], "xquant": [12, 13, 36, 37, 38, 48], "xquantconfig": 48}}) \ No newline at end of file diff --git a/docs/static/pygments.css b/docs/static/pygments.css index 5f2b0a250..0d49244ed 100644 --- a/docs/static/pygments.css +++ b/docs/static/pygments.css @@ -6,26 +6,26 @@ span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: .highlight .hll { background-color: #ffffcc } .highlight { background: #eeffcc; } .highlight .c { color: #408090; font-style: italic } /* Comment */ -.highlight .err { border: 1px solid #F00 } /* Error */ +.highlight .err { border: 1px solid #FF0000 } /* Error */ .highlight .k { color: #007020; font-weight: bold } /* Keyword */ -.highlight .o { color: #666 } /* Operator */ +.highlight .o { color: #666666 } /* Operator */ .highlight .ch { color: #408090; font-style: italic } /* Comment.Hashbang */ .highlight .cm { color: #408090; font-style: italic } /* Comment.Multiline */ .highlight .cp { color: #007020 } /* Comment.Preproc */ .highlight .cpf { color: #408090; font-style: italic } /* Comment.PreprocFile */ .highlight .c1 { color: #408090; font-style: italic } /* Comment.Single */ -.highlight .cs { color: #408090; background-color: #FFF0F0 } /* Comment.Special */ +.highlight .cs { color: #408090; background-color: #fff0f0 } /* Comment.Special */ .highlight .gd { color: #A00000 } /* Generic.Deleted */ .highlight .ge { font-style: italic } /* Generic.Emph */ .highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ -.highlight .gr { color: #F00 } /* Generic.Error */ +.highlight .gr { color: #FF0000 } /* Generic.Error */ .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ .highlight .gi { color: #00A000 } /* Generic.Inserted */ -.highlight .go { color: #333 } /* Generic.Output */ -.highlight .gp { color: #C65D09; font-weight: bold } /* Generic.Prompt */ +.highlight .go { color: #333333 } /* Generic.Output */ +.highlight .gp { color: #c65d09; font-weight: bold } /* Generic.Prompt */ .highlight .gs { font-weight: bold } /* Generic.Strong */ .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ -.highlight .gt { color: #04D } /* Generic.Traceback */ +.highlight .gt { color: #0044DD } /* Generic.Traceback */ .highlight .kc { color: #007020; font-weight: bold } /* Keyword.Constant */ .highlight .kd { color: #007020; font-weight: bold } /* Keyword.Declaration */ .highlight .kn { color: #007020; font-weight: bold } /* Keyword.Namespace */ @@ -33,43 +33,43 @@ span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: .highlight .kr { color: #007020; font-weight: bold } /* Keyword.Reserved */ .highlight .kt { color: #902000 } /* Keyword.Type */ .highlight .m { color: #208050 } /* Literal.Number */ -.highlight .s { color: #4070A0 } /* Literal.String */ -.highlight .na { color: #4070A0 } /* Name.Attribute */ +.highlight .s { color: #4070a0 } /* Literal.String */ +.highlight .na { color: #4070a0 } /* Name.Attribute */ .highlight .nb { color: #007020 } /* Name.Builtin */ -.highlight .nc { color: #0E84B5; font-weight: bold } /* Name.Class */ -.highlight .no { color: #60ADD5 } /* Name.Constant */ -.highlight .nd { color: #555; font-weight: bold } /* Name.Decorator */ -.highlight .ni { color: #D55537; font-weight: bold } /* Name.Entity */ +.highlight .nc { color: #0e84b5; font-weight: bold } /* Name.Class */ +.highlight .no { color: #60add5 } /* Name.Constant */ +.highlight .nd { color: #555555; font-weight: bold } /* Name.Decorator */ +.highlight .ni { color: #d55537; font-weight: bold } /* Name.Entity */ .highlight .ne { color: #007020 } /* Name.Exception */ -.highlight .nf { color: #06287E } /* Name.Function */ +.highlight .nf { color: #06287e } /* Name.Function */ .highlight .nl { color: #002070; font-weight: bold } /* Name.Label */ -.highlight .nn { color: #0E84B5; font-weight: bold } /* Name.Namespace */ +.highlight .nn { color: #0e84b5; font-weight: bold } /* Name.Namespace */ .highlight .nt { color: #062873; font-weight: bold } /* Name.Tag */ -.highlight .nv { color: #BB60D5 } /* Name.Variable */ +.highlight .nv { color: #bb60d5 } /* Name.Variable */ .highlight .ow { color: #007020; font-weight: bold } /* Operator.Word */ -.highlight .w { color: #BBB } /* Text.Whitespace */ +.highlight .w { color: #bbbbbb } /* Text.Whitespace */ .highlight .mb { color: #208050 } /* Literal.Number.Bin */ .highlight .mf { color: #208050 } /* Literal.Number.Float */ .highlight .mh { color: #208050 } /* Literal.Number.Hex */ .highlight .mi { color: #208050 } /* Literal.Number.Integer */ .highlight .mo { color: #208050 } /* Literal.Number.Oct */ -.highlight .sa { color: #4070A0 } /* Literal.String.Affix */ -.highlight .sb { color: #4070A0 } /* Literal.String.Backtick */ -.highlight .sc { color: #4070A0 } /* Literal.String.Char */ -.highlight .dl { color: #4070A0 } /* Literal.String.Delimiter */ -.highlight .sd { color: #4070A0; font-style: italic } /* Literal.String.Doc */ -.highlight .s2 { color: #4070A0 } /* Literal.String.Double */ -.highlight .se { color: #4070A0; font-weight: bold } /* Literal.String.Escape */ -.highlight .sh { color: #4070A0 } /* Literal.String.Heredoc */ -.highlight .si { color: #70A0D0; font-style: italic } /* Literal.String.Interpol */ -.highlight .sx { color: #C65D09 } /* Literal.String.Other */ +.highlight .sa { color: #4070a0 } /* Literal.String.Affix */ +.highlight .sb { color: #4070a0 } /* Literal.String.Backtick */ +.highlight .sc { color: #4070a0 } /* Literal.String.Char */ +.highlight .dl { color: #4070a0 } /* Literal.String.Delimiter */ +.highlight .sd { color: #4070a0; font-style: italic } /* Literal.String.Doc */ +.highlight .s2 { color: #4070a0 } /* Literal.String.Double */ +.highlight .se { color: #4070a0; font-weight: bold } /* Literal.String.Escape */ +.highlight .sh { color: #4070a0 } /* Literal.String.Heredoc */ +.highlight .si { color: #70a0d0; font-style: italic } /* Literal.String.Interpol */ +.highlight .sx { color: #c65d09 } /* Literal.String.Other */ .highlight .sr { color: #235388 } /* Literal.String.Regex */ -.highlight .s1 { color: #4070A0 } /* Literal.String.Single */ +.highlight .s1 { color: #4070a0 } /* Literal.String.Single */ .highlight .ss { color: #517918 } /* Literal.String.Symbol */ .highlight .bp { color: #007020 } /* Name.Builtin.Pseudo */ -.highlight .fm { color: #06287E } /* Name.Function.Magic */ -.highlight .vc { color: #BB60D5 } /* Name.Variable.Class */ -.highlight .vg { color: #BB60D5 } /* Name.Variable.Global */ -.highlight .vi { color: #BB60D5 } /* Name.Variable.Instance */ -.highlight .vm { color: #BB60D5 } /* Name.Variable.Magic */ +.highlight .fm { color: #06287e } /* Name.Function.Magic */ +.highlight .vc { color: #bb60d5 } /* Name.Variable.Class */ +.highlight .vg { color: #bb60d5 } /* Name.Variable.Global */ +.highlight .vi { color: #bb60d5 } /* Name.Variable.Instance */ +.highlight .vm { color: #bb60d5 } /* Name.Variable.Magic */ .highlight .il { color: #208050 } /* Literal.Number.Integer.Long */ \ No newline at end of file diff --git a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py index 308b0c90e..c7adc4881 100644 --- a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py +++ b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py @@ -28,6 +28,8 @@ from model_compression_toolkit.core.common.mixed_precision.sensitivity_eval.sensitivity_evaluation import SensitivityEvaluation from model_compression_toolkit.core.common.mixed_precision.solution_refinement_procedure import \ greedy_solution_refinement_procedure +from model_compression_toolkit.core.common.progress_config.progress_info_controller import \ + ProgressInfoController class BitWidthSearchMethod(Enum): @@ -41,7 +43,8 @@ def search_bit_width(graph: Graph, mp_config: MixedPrecisionQuantizationConfig, representative_data_gen: Callable, search_method: BitWidthSearchMethod = BitWidthSearchMethod.INTEGER_PROGRAMMING, - hessian_info_service: HessianInfoService = None) -> List[int]: + hessian_info_service: HessianInfoService = None, + progress_info_controller: ProgressInfoController = None) -> List[int]: """ Search for an MP configuration for a given graph. Given a search_method method (by default, it's linear programming), we use the sensitivity_evaluator object that provides a function to compute an @@ -59,6 +62,7 @@ def search_bit_width(graph: Graph, representative_data_gen: Dataset to use for retrieving images for the models inputs. search_method: BitWidthSearchMethod to define which searching method to use. hessian_info_service: HessianInfoService to fetch Hessian-approximation information. + progress_info_controller: ProgressInfoController to display and manage overall progress information. Returns: A MP configuration for the graph (list of integers, where the index in the list, is the node's @@ -81,7 +85,8 @@ def search_bit_width(graph: Graph, # even if a virtual graph was created (and is used only for BOPS utilization computation purposes) se = SensitivityEvaluation(graph, mp_config, representative_data_gen=representative_data_gen, fw_info=fw_info, fw_impl=fw_impl, disable_activation_for_metric=disable_activation_for_metric, - hessian_info_service=hessian_info_service) + hessian_info_service=hessian_info_service, + progress_info_controller=progress_info_controller) if search_method != BitWidthSearchMethod.INTEGER_PROGRAMMING: raise NotImplementedError() @@ -97,7 +102,8 @@ def search_bit_width(graph: Graph, fw_impl=fw_impl, sensitivity_evaluator=se, target_resource_utilization=target_resource_utilization, - mp_config=mp_config) + mp_config=mp_config, + progress_info_controller=progress_info_controller) nodes_bit_cfg = search_manager.search() graph.skip_validation_check = False diff --git a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py index d0191946d..aa35e93a7 100644 --- a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py +++ b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py @@ -44,6 +44,8 @@ from model_compression_toolkit.logger import Logger from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import \ MixedPrecisionQuantizationConfig, MpMetricNormalization +from model_compression_toolkit.core.common.progress_config.progress_info_controller import \ + ProgressInfoController class MixedPrecisionSearchManager: @@ -57,7 +59,8 @@ def __init__(self, fw_impl: FrameworkImplementation, sensitivity_evaluator: SensitivityEvaluation, target_resource_utilization: ResourceUtilization, - mp_config: MixedPrecisionQuantizationConfig): + mp_config: MixedPrecisionQuantizationConfig, + progress_info_controller: ProgressInfoController = None): """ Args: @@ -67,11 +70,14 @@ def __init__(self, sensitivity_evaluator: A SensitivityEvaluation which provides a function that evaluates the sensitivity of a bit-width configuration for the MP model. target_resource_utilization: Target Resource Utilization to bound our feasible solution space s.t the configuration does not violate it. + progress_info_controller: ProgressInfoController to display and manage overall progress information. """ self.fw_info = fw_info self.fw_impl = fw_impl + self.progress_info_controller = progress_info_controller + self.original_graph = graph # graph for mp search self.mp_graph, self.using_virtual_graph = self._get_mp_graph(graph, target_resource_utilization) @@ -183,6 +189,9 @@ def ensure_maxbit_minimal_metric(node_candidates_metrics, max_ind): metrics[max_ind] = max_val return metrics + if self.progress_info_controller is not None: + self.progress_info_controller.set_description('Research Mixed Precision') + layer_to_metrics_mapping = {} debug_mapping = {} for node_idx, node in tqdm(enumerate(self.mp_topo_configurable_nodes)): diff --git a/model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/metric_calculators.py b/model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/metric_calculators.py index f3f36d913..e926148e3 100644 --- a/model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/metric_calculators.py +++ b/model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/metric_calculators.py @@ -22,6 +22,8 @@ from model_compression_toolkit.core.common.model_builder_mode import ModelBuilderMode from model_compression_toolkit.core.common.similarity_analyzer import compute_kl_divergence from model_compression_toolkit.logger import Logger +from model_compression_toolkit.core.common.progress_config.progress_info_controller import \ + ProgressInfoController @runtime_checkable @@ -64,7 +66,8 @@ def __init__(self, representative_data_gen: Callable, fw_info: FrameworkInfo, fw_impl: Any, - hessian_info_service: HessianInfoService = None): + hessian_info_service: HessianInfoService = None, + progress_info_controller: ProgressInfoController = None): """ Args: graph: Graph to search for its MP configuration. @@ -74,6 +77,7 @@ def __init__(self, fw_impl: FrameworkImplementation object with a specific framework methods implementation. representative_data_gen: Dataset used for getting batches for inference. hessian_info_service: HessianInfoService to fetch Hessian approximation information. + progress_info_controller: ProgressInfoController to display and manage overall progress information. """ self.graph = graph self.mp_config = mp_config @@ -121,7 +125,7 @@ def __init__(self, # Hessian-based scores for weighted average distance metric computation self.interest_points_hessians = None if self.mp_config.distance_weighting_method == MpDistanceWeighting.HESSIAN: - self.interest_points_hessians = self._compute_hessian_based_scores(hessian_info_service) + self.interest_points_hessians = self._compute_hessian_based_scores(hessian_info_service, progress_info_controller) def compute(self, mp_model) -> float: """ @@ -168,16 +172,20 @@ def _init_baseline_tensors_list(self): return [self.fw_impl.to_numpy(self.fw_impl.sensitivity_eval_inference(self.ref_model, images)) for images in self.images_batches] - def _compute_hessian_based_scores(self, hessian_info_service: HessianInfoService) -> np.ndarray: + def _compute_hessian_based_scores(self, hessian_info_service: HessianInfoService, progress_info_controller: ProgressInfoController) -> np.ndarray: """ Compute Hessian-based scores for each interest point. Args: hessian_info_service: Hessian service. + progress_info_controller: Progress infomation controller. Returns: A vector of scores, one for each interest point, to be used for the distance metric weighted average computation. """ + if progress_info_controller is not None: + progress_info_controller.set_description('Compute Hessian for Mixed Precision') + # Create a request for Hessian approximation scores with specific configurations # (here we use per-tensor approximation of the Hessian's trace w.r.t the node's activations) fw_dataloader = self.fw_impl.convert_data_gen_to_dataloader(self.representative_data_gen, diff --git a/model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/sensitivity_evaluation.py b/model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/sensitivity_evaluation.py index 399dc583b..55029e73a 100644 --- a/model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/sensitivity_evaluation.py +++ b/model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/sensitivity_evaluation.py @@ -27,6 +27,8 @@ from model_compression_toolkit.core.common.quantization.node_quantization_config import ActivationQuantizationMode from model_compression_toolkit.core.common.model_builder_mode import ModelBuilderMode from model_compression_toolkit.core.common.hessian import HessianInfoService +from model_compression_toolkit.core.common.progress_config.progress_info_controller import \ + ProgressInfoController class SensitivityEvaluation: @@ -41,7 +43,8 @@ def __init__(self, fw_info: FrameworkInfo, fw_impl: Any, disable_activation_for_metric: bool = False, - hessian_info_service: HessianInfoService = None + hessian_info_service: HessianInfoService = None, + progress_info_controller: ProgressInfoController = None ): """ Args: @@ -53,7 +56,7 @@ def __init__(self, fw_impl: FrameworkImplementation object with a specific framework methods implementation. disable_activation_for_metric: Whether to disable activation quantization when computing the MP metric. hessian_info_service: HessianInfoService to fetch Hessian approximation information. - + progress_info_controller: ProgressInfoController to display and manage overall progress information. """ self.mp_config = mp_config self.representative_data_gen = representative_data_gen @@ -65,7 +68,8 @@ def __init__(self, else: self.metric_calculator = DistanceMetricCalculator(graph, mp_config, representative_data_gen, fw_info=fw_info, fw_impl=fw_impl, - hessian_info_service=hessian_info_service) + hessian_info_service=hessian_info_service, + progress_info_controller=progress_info_controller) # Build a mixed-precision model which can be configured to use different bitwidth in different layers. # Also, returns a mapping between a configurable graph's node and its matching layer(s) in the built MP model. diff --git a/model_compression_toolkit/core/common/progress_config/__init__.py b/model_compression_toolkit/core/common/progress_config/__init__.py new file mode 100644 index 000000000..0f3b9c87e --- /dev/null +++ b/model_compression_toolkit/core/common/progress_config/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2026 Sony Semiconductor Solutions, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== diff --git a/model_compression_toolkit/core/common/progress_config/constants.py b/model_compression_toolkit/core/common/progress_config/constants.py new file mode 100755 index 000000000..403707c51 --- /dev/null +++ b/model_compression_toolkit/core/common/progress_config/constants.py @@ -0,0 +1,24 @@ +# Copyright 2026 Sony Semiconductor Solutions, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +COMPLETED_COMPONENTS = 'completedComponents' +TOTAL_COMPONENTS = 'totalComponents' +CURRENT_COMPONENT = 'currentComponent' + +PROGRESS_INFO_CALLBACK = 'progress_info_callback' +TOTAL_STEP = 'total_step' + +PROGRESS_BAR_POSITION = 2 +DEFAULT_TOTAL_STEP = 4 diff --git a/model_compression_toolkit/core/common/progress_config/progress_info_controller.py b/model_compression_toolkit/core/common/progress_config/progress_info_controller.py new file mode 100644 index 000000000..e279492cd --- /dev/null +++ b/model_compression_toolkit/core/common/progress_config/progress_info_controller.py @@ -0,0 +1,159 @@ + +# Copyright 2026 Sony Semiconductor Solutions, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from typing import Optional, Callable, TYPE_CHECKING +from dataclasses import dataclass, field +from tqdm import tqdm + +from model_compression_toolkit.core.common.progress_config.constants import ( + COMPLETED_COMPONENTS, TOTAL_COMPONENTS, CURRENT_COMPONENT, + PROGRESS_BAR_POSITION, PROGRESS_INFO_CALLBACK, TOTAL_STEP, DEFAULT_TOTAL_STEP +) + +if TYPE_CHECKING: # pragma: no cover + from model_compression_toolkit.core import CoreConfig + from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfig + from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization + + +@dataclass +class ProgressInfoController: + """ + A unified progress bar controller class. + Support single progress bar. + + Attributes: + total_step: Total number of processing steps. + description: Description for the progress bar. + current_step: Current step number (starts from 0, incremented by set_description()). + callback: User-defined callback function. + """ + total_step: int = field(default=0) + current_step: int = field(default=0) + description: str = field(default="Model Compression Toolkit Progress Infomation") + progress_info_callback: Optional[Callable] = field(default=None) + + def __new__(cls, *args, **kwargs): + """ + Create or skip instantiation based on the enable flag. + Returns None when progress display should be disabled. + """ + progress_info_callback = kwargs.get(PROGRESS_INFO_CALLBACK) + total_step = kwargs.get(TOTAL_STEP) + + if progress_info_callback is None or total_step <= 0: + return None + + if not callable(progress_info_callback): + raise TypeError(f"{PROGRESS_INFO_CALLBACK} must be a callable (function or callable instance).") + + return super().__new__(cls) + + def __post_init__(self): + """Create progress bar after initialization.""" + # Initial single bar mode + self.pbar = tqdm( + total=self.total_step, + desc=self.description, + position=PROGRESS_BAR_POSITION, + leave=False, + unit='step', + dynamic_ncols=True, + bar_format='{l_bar}{bar:}|' + ) + + def set_description(self, description: str): + """ + Update progress bar description. + Automatically increments step number each time set_description is called, + displaying in "Step X/Y: ..." format. + + Args: + description: New description text ("Step X/Y: " is automatically added). + """ + self.description = description + self.current_step += 1 + formatted_description = f"Step {self.current_step}/{self.total_step}: {description}" + + try: + assert self.current_step <= self.total_step, \ + f"current_step: {self.current_step}, exceeded total_step: {self.total_step}." + except AssertionError: + self.close() + raise + + self.pbar.set_description(formatted_description, refresh=False) + self.pbar.update() + + progress_info = { + COMPLETED_COMPONENTS: description, + TOTAL_COMPONENTS: self.total_step, + CURRENT_COMPONENT: self.current_step + } + self.progress_info_callback(progress_info) + + def close(self): + """Close progress bar.""" + if self.pbar is not None: + self.pbar.close() + self.pbar = None + + +def research_progress_total(core_config: 'CoreConfig', + target_resource_utilization: 'ResourceUtilization' = None, + gptq_config: 'GradientPTQConfig' = None) -> int: + """ + Check whether specific processing will be executed based on input arguments + and calculate the total number of processing steps. + + Processing step breakdown: + 1. Preprocessing (required) + 2. Statistics calculation (required) + 3. Weight parameter calculation (required) + 4. Hessian calculation (when GPTQ or specific settings enabled) + 5. MP calculation (when Mixed Precision enabled) + 6. Post-processing ~ conversion to exportable model (required) + + Args: + core_config: CoreConfig object. + target_resource_utilization: ResourceUtilization object (used for Mixed Precision determination). + gptq_config: GPTQ configuration object. + + Returns: + Total number of processing steps. + """ + # Base required steps: preprocessing, statistics, weight params, post-processing + total_steps = DEFAULT_TOTAL_STEP + + # Add MP calculation step (when Mixed Precision enabled) + if target_resource_utilization is not None and \ + target_resource_utilization.is_any_restricted(): + total_steps += 1 + + # Add Hessian step (when Mixed Precision with Hessian enabled) + if core_config.mixed_precision_config is not None and \ + core_config.mixed_precision_config.use_hessian_based_scores: + total_steps += 1 + + # Add GPTQ training step (when GPTQ is enabled) + if gptq_config is not None: + total_steps += 1 + + # Add Hessian step (when GPTQ with Hessian enabled) + if gptq_config.hessian_weights_config is not None: + total_steps += 1 + + return total_steps diff --git a/model_compression_toolkit/core/common/quantization/debug_config.py b/model_compression_toolkit/core/common/quantization/debug_config.py index 2f0ccde7a..1abefa11d 100644 --- a/model_compression_toolkit/core/common/quantization/debug_config.py +++ b/model_compression_toolkit/core/common/quantization/debug_config.py @@ -14,7 +14,7 @@ # ============================================================================== from dataclasses import dataclass, field -from typing import List +from typing import List, Callable from model_compression_toolkit.core.common.network_editors.edit_network import EditRule @@ -30,9 +30,83 @@ class DebugConfig: network_editor (List[EditRule]): A list of rules and actions to edit the network for quantization. simulate_scheduler (bool): Simulate scheduler behavior to compute operators' order and cuts. bypass (bool): A flag to enable MCT bypass, which skips MCT runner and returns the input model unchanged. + progress_info_callback (Callable): A user-defined callback function for retrieving progress information. + + About progress_info_callback + + The `progress_info_callback` parameter in `DebugConfig` enables the following features and allows users to retrieve progress information when a callback function is configured: + + - The callback function can receive MCT progress information. + - A progress bar is displayed in the CUI, allowing users to visualize how much processing has been completed while MCT is running. + + If no callback function is set, these features are disabled and the behavior and output remain unchanged. + Examples of how to create a callback function to enable these features are provided in the Examples section. + + Examples: + + Create a callable callback function. + When defining the callback, make sure it accepts a dictionary representing the current processing state as an argument. + + Example 1: Use a class to keep track of the processing history. + + >>> class ProgressInfoCallback: + ... def __init__(self): + ... self.history = [] + ... + ... def __call__(self, info): + ... current = info["currentComponent"] + ... total = info["totalComponents"] + ... component_name = info["completedComponents"] + ... + ... self.history.append({ + ... "component_name": component_name, + ... "current": current, + ... "total": total + ... }) + ... + >>> progress_info_callback = ProgressInfoCallback() + + + Example 2: Use a function to output the progress percentage and processing name to standard error (stderr). + + >>> def progress_info_callback(info): + ... current = info["currentComponent"] + ... total = info["totalComponents"] + ... component_name = info["completedComponents"] + ... + ... progress_percent = (current / total * 100.0) + ... + ... print(f"[{current}/{total}] {progress_percent:6.2f}% {component_name}", + ... file=__import__('sys').stderr, flush=True) + + From the processing state dictionary, you can retrieve information using the following keys: + + .. list-table:: Keys in the processing state dictionary + :header-rows: 1 + + * - Parameter Key + - Value Type + - Description + * - "currentComponent" + - int + - Current processing step + * - "totalComponents" + - int + - Total number of processing steps + * - "completedComponents" + - str + - Name of the component currently being processed + + Import MCT and configure DebugConfig with the callback function you created. + Configure CoreConfig with this DebugConfig and use it. + + >>> import model_compression_toolkit as mct + >>> debug_config = mct.core.DebugConfig(progress_info_callback=progress_info_callback) + >>> core_config = mct.core.CoreConfig(debug_config=debug_config) """ analyze_similarity: bool = False network_editor: List[EditRule] = field(default_factory=list) simulate_scheduler: bool = False bypass: bool = False + progress_info_callback: Callable = None diff --git a/model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_computation.py b/model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_computation.py index ca8cdbfd7..136db0937 100644 --- a/model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_computation.py +++ b/model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_computation.py @@ -28,6 +28,8 @@ from model_compression_toolkit.core.common.quantization.quantization_params_generation.qparams_weights_computation import \ get_weights_qparams from model_compression_toolkit.logger import Logger +from model_compression_toolkit.core.common.progress_config.progress_info_controller import \ + ProgressInfoController def _collect_nodes_for_hmse(nodes_list: List[BaseNode], graph: Graph) -> List[BaseNode]: @@ -60,7 +62,8 @@ def calculate_quantization_params(graph: Graph, repr_data_gen_fn: Callable[[], Generator], nodes: List[BaseNode] = None, hessian_info_service: HessianInfoService = None, - num_hessian_samples: int = NUM_QPARAM_HESSIAN_SAMPLES): + num_hessian_samples: int = NUM_QPARAM_HESSIAN_SAMPLES, + progress_info_controller: ProgressInfoController = None): """ For a graph, go over its nodes, compute quantization params (for both weights and activations according to the given framework info), and create and attach a NodeQuantizationConfig to each node (containing the @@ -75,6 +78,7 @@ def calculate_quantization_params(graph: Graph, nodes: List of nodes to compute their thresholds instead of computing it for all nodes in the graph. hessian_info_service: HessianInfoService object for retrieving Hessian-based scores (used only with HMSE error method). num_hessian_samples: Number of samples to approximate Hessian-based scores on (used only with HMSE error method). + progress_info_controller: ProgressInfoController to display and manage overall progress information. """ Logger.info(f"\nRunning quantization parameters search. " @@ -97,6 +101,9 @@ def calculate_quantization_params(graph: Graph, target_nodes=nodes_for_hmse) hessian_info_service.fetch_hessian(request) + if progress_info_controller is not None: + progress_info_controller.set_description('Calculate Quantization Parameters') + for n in tqdm(nodes_list, "Calculating quantization parameters"): # iterate only nodes that we should compute their thresholds for candidate_qc in n.candidates_quantization_cfg: for attr in n.get_node_weights_attributes(): diff --git a/model_compression_toolkit/core/quantization_prep_runner.py b/model_compression_toolkit/core/quantization_prep_runner.py index 3b03ad2bb..6aab979ee 100644 --- a/model_compression_toolkit/core/quantization_prep_runner.py +++ b/model_compression_toolkit/core/quantization_prep_runner.py @@ -32,6 +32,8 @@ from model_compression_toolkit.core.common.substitutions.apply_substitutions import substitute from model_compression_toolkit.core.common.visualization.tensorboard_writer import TensorboardWriter +from model_compression_toolkit.core.common.progress_config.progress_info_controller import \ + ProgressInfoController def quantization_preparation_runner(graph: Graph, @@ -40,7 +42,8 @@ def quantization_preparation_runner(graph: Graph, fw_info: FrameworkInfo, fw_impl: FrameworkImplementation, tb_w: TensorboardWriter = None, - hessian_info_service: HessianInfoService = None, ) -> Graph: + hessian_info_service: HessianInfoService = None, + progress_info_controller: ProgressInfoController = None) -> Graph: """ Prepares a trained model for post-training quantization. First, the model graph is optimized using several transformations (e.g. folding BatchNormalization to preceding layers). @@ -58,6 +61,7 @@ def quantization_preparation_runner(graph: Graph, fw_impl: FrameworkImplementation object with a specific framework methods implementation. tb_w: TensorboardWriter object for logging hessian_info_service: HessianInfoService object for retrieving Hessian-based scores. + progress_info_controller: ProgressInfoController to display and manage overall progress information. Returns: Graph object that represents the model, contains thresholds, and ready for quantization. @@ -66,6 +70,9 @@ def quantization_preparation_runner(graph: Graph, ###################################### # Statistic collection ###################################### + if progress_info_controller is not None: + progress_info_controller.set_description('Statistics Collection') + mi = ModelCollector(graph, fw_impl, fw_info, @@ -92,7 +99,8 @@ def quantization_preparation_runner(graph: Graph, ###################################### calculate_quantization_params(graph, fw_impl=fw_impl, repr_data_gen_fn=representative_data_gen, - hessian_info_service=hessian_info_service) + hessian_info_service=hessian_info_service, + progress_info_controller=progress_info_controller) if tb_w is not None: tb_w.add_graph(graph, 'thresholds_selection') diff --git a/model_compression_toolkit/core/runner.py b/model_compression_toolkit/core/runner.py index 8226f59e6..53a380184 100644 --- a/model_compression_toolkit/core/runner.py +++ b/model_compression_toolkit/core/runner.py @@ -36,6 +36,8 @@ from model_compression_toolkit.core.common.quantization.core_config import CoreConfig from model_compression_toolkit.core.common.visualization.tensorboard_writer import TensorboardWriter, \ finalize_bitwidth_in_tb +from model_compression_toolkit.core.common.progress_config.progress_info_controller import \ + ProgressInfoController from model_compression_toolkit.core.graph_prep_runner import graph_preparation_runner from model_compression_toolkit.core.quantization_prep_runner import quantization_preparation_runner from model_compression_toolkit.logger import Logger @@ -51,7 +53,8 @@ def core_runner(in_model: Any, fqc: FrameworkQuantizationCapabilities, target_resource_utilization: ResourceUtilization = None, running_gptq: bool = False, - tb_w: TensorboardWriter = None): + tb_w: TensorboardWriter = None, + progress_info_controller: ProgressInfoController = None): """ Quantize a trained model using post-training quantization. First, the model graph is optimized using several transformations (e.g. folding BatchNormalization to preceding @@ -72,11 +75,14 @@ def core_runner(in_model: Any, the attached framework operator's information. target_resource_utilization: ResourceUtilization to constraint the search of the mixed-precision configuration for the model. tb_w: TensorboardWriter object for logging + progress_info_controller: ProgressInfoController to display and manage overall progress information. Returns: An internal graph representation of the input model. """ + if progress_info_controller is not None: + progress_info_controller.set_description('MCT Graph Preprocessing') # Warn is representative dataset has batch-size == 1 batch_data = next(iter(representative_data_gen())) @@ -115,7 +121,8 @@ def core_runner(in_model: Any, fw_info=fw_info, fw_impl=fw_impl, tb_w=tb_w, - hessian_info_service=hessian_info_service) + hessian_info_service=hessian_info_service, + progress_info_controller=progress_info_controller) ###################################### # Finalize bit widths @@ -130,7 +137,8 @@ def core_runner(in_model: Any, target_resource_utilization, core_config.mixed_precision_config, representative_data_gen, - hessian_info_service=hessian_info_service) + hessian_info_service=hessian_info_service, + progress_info_controller=progress_info_controller) else: Logger.warning( f'Mixed Precision has overwrite bit-width configuration{core_config.mixed_precision_config.configuration_overwrite}') diff --git a/model_compression_toolkit/gptq/common/gptq_training.py b/model_compression_toolkit/gptq/common/gptq_training.py index 8c6a4168e..38fb4bcc9 100644 --- a/model_compression_toolkit/gptq/common/gptq_training.py +++ b/model_compression_toolkit/gptq/common/gptq_training.py @@ -31,6 +31,8 @@ get_gradual_activation_quantizer_wrapper_factory from model_compression_toolkit.gptq.common.regularization_factory import get_regularization from model_compression_toolkit.logger import Logger +from model_compression_toolkit.core.common.progress_config.progress_info_controller import \ + ProgressInfoController from model_compression_toolkit.trainable_infrastructure.common.util import get_total_grad_steps @@ -46,7 +48,8 @@ def __init__(self, fw_impl: GPTQFrameworkImplemantation, fw_info: FrameworkInfo, representative_data_gen_fn: Callable[[], Generator], - hessian_info_service: HessianInfoService = None): + hessian_info_service: HessianInfoService = None, + progress_info_controller: ProgressInfoController = None): """ Build two models from a graph: A teacher network (float model) and a student network (quantized model). Use the dataset generator to pass images through the teacher and student networks to get intermediate @@ -61,6 +64,7 @@ def __init__(self, fw_info: Framework information representative_data_gen_fn: factory for representative data generator. hessian_info_service: HessianInfoService for fetching and computing Hessian-approximation information. + progress_info_controller: ProgressInfoController to display and manage overall progress information. """ self.graph_float = copy.deepcopy(graph_float) self.graph_quant = copy.deepcopy(graph_quant) @@ -68,6 +72,7 @@ def __init__(self, self.fw_impl = fw_impl self.fw_info = fw_info self.representative_data_gen_fn = representative_data_gen_fn + self.progress_info_controller = progress_info_controller def _get_total_grad_steps(): return get_total_grad_steps(representative_data_gen_fn) * gptq_config.n_epochs @@ -131,6 +136,10 @@ def _get_total_grad_steps(): [len(optimizer_params_tuple[1]) for optimizer_params_tuple in self.optimizer_with_param]) > 0 self.use_sample_layer_attention = hessian_cfg and hessian_cfg.per_sample + if hessian_cfg: + if self.progress_info_controller is not None: + self.progress_info_controller.set_description('Compute Hessian for GPTQ') + if self.use_sample_layer_attention: # normalization is currently not supported, make sure the config reflects it. if hessian_cfg.norm_scores or hessian_cfg.log_norm or hessian_cfg.scale_log_norm: @@ -289,7 +298,8 @@ def gptq_training(graph_float: Graph, representative_data_gen: Callable, fw_impl: GPTQFrameworkImplemantation, fw_info: FrameworkInfo, - hessian_info_service: HessianInfoService = None) -> Graph: + hessian_info_service: HessianInfoService = None, + progress_info_controller: ProgressInfoController = None) -> Graph: """ GPTQ training process using knowledge distillation with a teacher network (float model) and a student network (quantized model). Args: @@ -300,6 +310,7 @@ def gptq_training(graph_float: Graph, fw_impl: Framework implementation fw_info: Framework information hessian_info_service: HessianInfoService to fetch information based on the Hessian approximation. + progress_info_controller: ProgressInfoController to display and manage overall progress information. Returns: Quantized graph for export @@ -314,9 +325,12 @@ def gptq_training(graph_float: Graph, fw_impl, fw_info, representative_data_gen, - hessian_info_service=hessian_info_service) + hessian_info_service=hessian_info_service, + progress_info_controller=progress_info_controller) # Training process + if progress_info_controller is not None: + progress_info_controller.set_description('Train with GPTQ') gptq_trainer.train() # Update graph diff --git a/model_compression_toolkit/gptq/keras/gptq_training.py b/model_compression_toolkit/gptq/keras/gptq_training.py index 1e135ff21..b77a99cff 100644 --- a/model_compression_toolkit/gptq/keras/gptq_training.py +++ b/model_compression_toolkit/gptq/keras/gptq_training.py @@ -54,6 +54,9 @@ import copy from model_compression_toolkit.core.keras.constants import BIAS, USE_BIAS from model_compression_toolkit.gptq.keras.quantizer.soft_rounding.soft_quantizer_reg import SoftQuantizerRegularization +from model_compression_toolkit.core.common.progress_config.progress_info_controller import \ + ProgressInfoController + class KerasGPTQTrainer(GPTQTrainer): """ @@ -67,7 +70,8 @@ def __init__(self, fw_impl: FrameworkImplementation, fw_info: FrameworkInfo, representative_data_gen: Callable, - hessian_info_service: HessianInfoService = None): + hessian_info_service: HessianInfoService = None, + progress_info_controller: ProgressInfoController = None): """ Build two models from a graph: A teacher network (float model) and a student network (quantized model). Use the dataset generator to pass images through the teacher and student networks to get intermediate @@ -82,6 +86,7 @@ def __init__(self, fw_info: Framework information. representative_data_gen: Dataset to use for inputs of the models. hessian_info_service: HessianScoresService for fetching and computing Hessian's approximation scores. + progress_info_controller: ProgressInfoController to display and manage overall progress information. """ @@ -96,7 +101,8 @@ def __init__(self, fw_impl, fw_info, representative_data_gen_fn=representative_data_gen, - hessian_info_service=hessian_info_service) + hessian_info_service=hessian_info_service, + progress_info_controller=progress_info_controller) def _prepare_train_dataloader_sla(self, data_gen_fn: Callable[[], Generator]) -> tf.data.Dataset: diff --git a/model_compression_toolkit/gptq/keras/quantization_facade.py b/model_compression_toolkit/gptq/keras/quantization_facade.py index 0726e516c..54bb61d26 100644 --- a/model_compression_toolkit/gptq/keras/quantization_facade.py +++ b/model_compression_toolkit/gptq/keras/quantization_facade.py @@ -33,6 +33,8 @@ from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import MixedPrecisionQuantizationConfig from model_compression_toolkit.core import CoreConfig +from model_compression_toolkit.core.common.progress_config.progress_info_controller import \ + ProgressInfoController, research_progress_total from model_compression_toolkit.core.runner import core_runner from model_compression_toolkit.gptq.runner import gptq_runner from model_compression_toolkit.core.analyzer import analyzer_model_quantization @@ -253,6 +255,12 @@ def keras_gradient_post_training_quantization(in_model: Model, representative_da target_platform_capabilities, custom_opset2layer=core_config.quantization_config.custom_tpc_opset_to_layer) + progress_info_controller = ProgressInfoController( + total_step=research_progress_total(core_config, target_resource_utilization, gptq_config), + description="MCT Keras GPTQ Progress", + progress_info_callback=core_config.debug_config.progress_info_callback + ) + tg, bit_widths_config, hessian_info_service, scheduling_info = core_runner(in_model=in_model, representative_data_gen=representative_data_gen, core_config=core_config, @@ -261,7 +269,8 @@ def keras_gradient_post_training_quantization(in_model: Model, representative_da fqc=framework_platform_capabilities, target_resource_utilization=target_resource_utilization, tb_w=tb_w, - running_gptq=True) + running_gptq=True, + progress_info_controller=progress_info_controller) float_graph = copy.deepcopy(tg) @@ -273,10 +282,14 @@ def keras_gradient_post_training_quantization(in_model: Model, representative_da DEFAULT_KERAS_INFO, fw_impl, tb_w, - hessian_info_service=hessian_info_service) + hessian_info_service=hessian_info_service, + progress_info_controller=progress_info_controller) del hessian_info_service + if progress_info_controller is not None: + progress_info_controller.set_description("MCT Graph Finalization") + if core_config.debug_config.analyze_similarity: analyzer_model_quantization(representative_data_gen, tb_w, @@ -290,6 +303,10 @@ def keras_gradient_post_training_quantization(in_model: Model, representative_da exportable_model = add_metadata(exportable_model, create_model_metadata(fqc=framework_platform_capabilities, scheduling_info=scheduling_info)) + + if progress_info_controller is not None: + progress_info_controller.close() + return exportable_model, user_info else: diff --git a/model_compression_toolkit/gptq/pytorch/gptq_training.py b/model_compression_toolkit/gptq/pytorch/gptq_training.py index 6b7b0378b..7d9c9b8dc 100644 --- a/model_compression_toolkit/gptq/pytorch/gptq_training.py +++ b/model_compression_toolkit/gptq/pytorch/gptq_training.py @@ -42,6 +42,8 @@ from model_compression_toolkit.gptq.pytorch.quantizer.soft_rounding.soft_quantizer_reg import SoftQuantizerRegularization as PytorchSoftQuantizerRegularization from model_compression_toolkit.logger import Logger +from model_compression_toolkit.core.common.progress_config.progress_info_controller import \ + ProgressInfoController class PytorchGPTQTrainer(GPTQTrainer): @@ -56,7 +58,8 @@ def __init__(self, fw_impl: FrameworkImplementation, fw_info: FrameworkInfo, representative_data_gen: Callable, - hessian_info_service: HessianInfoService = None): + hessian_info_service: HessianInfoService = None, + progress_info_controller: ProgressInfoController = None): """ Build two models from a graph: A teacher network (float model) and a student network (quantized model). Use the dataset generator to pass images through the teacher and student networks to get intermediate @@ -71,6 +74,7 @@ def __init__(self, fw_info: Framework information representative_data_gen: Dataset to use for inputs of the models. hessian_info_service: HessianInfoService to fetch info based on the hessian approximation of the float model. + progress_info_controller: ProgressInfoController to display and manage overall progress information. """ self.fw_soft_quantizer_regularization = PytorchSoftQuantizerRegularization self.fw_linear_annealing_scheduler = PytorchLinearAnnealingScheduler @@ -83,7 +87,8 @@ def __init__(self, fw_impl, fw_info, representative_data_gen_fn=representative_data_gen, - hessian_info_service=hessian_info_service) + hessian_info_service=hessian_info_service, + progress_info_controller=progress_info_controller) def _prepare_train_dataloader_sla(self, data_gen_fn: Callable[[], Generator]) -> DataLoader: diff --git a/model_compression_toolkit/gptq/pytorch/quantization_facade.py b/model_compression_toolkit/gptq/pytorch/quantization_facade.py index 22fcc61a0..d19894fa1 100644 --- a/model_compression_toolkit/gptq/pytorch/quantization_facade.py +++ b/model_compression_toolkit/gptq/pytorch/quantization_facade.py @@ -22,6 +22,8 @@ MixedPrecisionQuantizationConfig from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \ ResourceUtilization +from model_compression_toolkit.core.common.progress_config.progress_info_controller import \ + ProgressInfoController, research_progress_total from model_compression_toolkit.core.common.user_info import UserInformation from model_compression_toolkit.core.common.visualization.tensorboard_writer import init_tensorboard_writer from model_compression_toolkit.core.runner import core_runner @@ -226,6 +228,12 @@ def pytorch_gradient_post_training_quantization(model: Module, framework_quantization_capabilities = attach2pytorch.attach(target_platform_capabilities, core_config.quantization_config.custom_tpc_opset_to_layer) + progress_info_controller = ProgressInfoController( + total_step=research_progress_total(core_config, target_resource_utilization, gptq_config), + description="MCT PyTorch GPTQ Progress", + progress_info_callback=core_config.debug_config.progress_info_callback + ) + # ---------------------- # # Core Runner # ---------------------- # @@ -237,7 +245,8 @@ def pytorch_gradient_post_training_quantization(model: Module, fqc=framework_quantization_capabilities, target_resource_utilization=target_resource_utilization, tb_w=tb_w, - running_gptq=True) + running_gptq=True, + progress_info_controller=progress_info_controller) float_graph = copy.deepcopy(graph) @@ -252,7 +261,11 @@ def pytorch_gradient_post_training_quantization(model: Module, DEFAULT_PYTORCH_INFO, fw_impl, tb_w, - hessian_info_service=hessian_info_service) + hessian_info_service=hessian_info_service, + progress_info_controller=progress_info_controller) + + if progress_info_controller is not None: + progress_info_controller.set_description("MCT Graph Finalization") if core_config.debug_config.analyze_similarity: analyzer_model_quantization(representative_data_gen, @@ -267,6 +280,10 @@ def pytorch_gradient_post_training_quantization(model: Module, exportable_model = add_metadata(exportable_model, create_model_metadata(fqc=framework_quantization_capabilities, scheduling_info=scheduling_info)) + + if progress_info_controller is not None: + progress_info_controller.close() + return exportable_model, user_info diff --git a/model_compression_toolkit/gptq/runner.py b/model_compression_toolkit/gptq/runner.py index 9d1fbc65f..b9238c717 100644 --- a/model_compression_toolkit/gptq/runner.py +++ b/model_compression_toolkit/gptq/runner.py @@ -30,6 +30,8 @@ from model_compression_toolkit.core.common.statistics_correction.apply_bias_correction_to_graph import \ apply_bias_correction_to_graph from model_compression_toolkit.logger import Logger +from model_compression_toolkit.core.common.progress_config.progress_info_controller import \ + ProgressInfoController def _apply_gptq(gptq_config: GradientPTQConfig, @@ -39,7 +41,8 @@ def _apply_gptq(gptq_config: GradientPTQConfig, tg_bias: Graph, fw_info: FrameworkInfo, fw_impl: FrameworkImplementation, - hessian_info_service: HessianInfoService = None) -> Graph: + hessian_info_service: HessianInfoService = None, + progress_info_controller: ProgressInfoController = None) -> Graph: """ Apply GPTQ to improve accuracy of quantized model. Build two models from a graph: A teacher network (float model) and a student network (quantized model). @@ -55,6 +58,7 @@ def _apply_gptq(gptq_config: GradientPTQConfig, fw_info: Information needed for quantization about the specific framework (e.g., kernel channels indices, groups of layers by how they should be quantized, etc.). fw_impl: Framework implementation per framework hessian_info_service: HessianInfoService to fetch information based on the hessian approximation for the float model. + progress_info_controller: ProgressInfoController to display and manage overall progress information. Returns: """ @@ -65,7 +69,8 @@ def _apply_gptq(gptq_config: GradientPTQConfig, representative_data_gen, fw_impl, fw_info, - hessian_info_service=hessian_info_service) + hessian_info_service=hessian_info_service, + progress_info_controller=progress_info_controller) if tb_w is not None: tb_w.add_graph(tg_bias, 'after_gptq') @@ -80,7 +85,8 @@ def gptq_runner(tg: Graph, fw_info: FrameworkInfo, fw_impl: FrameworkImplementation, tb_w: TensorboardWriter, - hessian_info_service: HessianInfoService = None) -> Graph: + hessian_info_service: HessianInfoService = None, + progress_info_controller: ProgressInfoController = None) -> Graph: """ Quantize a graph that has final weights candidates quantization configurations. Before we quantize the graph weights, we apply GPTQ to get an improved graph. @@ -95,6 +101,7 @@ def gptq_runner(tg: Graph, fw_impl: FrameworkImplementation object with a specific framework methods implementation. tb_w: A TensorBoardWriter object initialized with the logger dir path if it was set, or None otherwise. hessian_info_service: HessianScoresService to fetch approximations of the hessian scores for the float model. + progress_info_controller: ProgressInfoController to display and manage overall progress information. Returns: A graph after model weights GPTQ fine-tuning. @@ -119,6 +126,7 @@ def gptq_runner(tg: Graph, tg_bias, fw_info, fw_impl, - hessian_info_service=hessian_info_service) + hessian_info_service=hessian_info_service, + progress_info_controller=progress_info_controller) return tg_gptq diff --git a/model_compression_toolkit/ptq/keras/quantization_facade.py b/model_compression_toolkit/ptq/keras/quantization_facade.py index 8ddcba218..2732b602f 100644 --- a/model_compression_toolkit/ptq/keras/quantization_facade.py +++ b/model_compression_toolkit/ptq/keras/quantization_facade.py @@ -28,6 +28,8 @@ from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import \ MixedPrecisionQuantizationConfig +from model_compression_toolkit.core.common.progress_config.progress_info_controller import \ + ProgressInfoController, research_progress_total from model_compression_toolkit.core.runner import core_runner from model_compression_toolkit.ptq.runner import ptq_runner from model_compression_toolkit.metadata import create_model_metadata @@ -147,6 +149,12 @@ def keras_post_training_quantization(in_model: Model, target_platform_capabilities, custom_opset2layer=core_config.quantization_config.custom_tpc_opset_to_layer) + progress_info_controller = ProgressInfoController( + total_step=research_progress_total(core_config, target_resource_utilization), + description="MCT Keras PTQ Progress", + progress_info_callback=core_config.debug_config.progress_info_callback + ) + # Ignore returned hessian service as PTQ does not use it tg, bit_widths_config, _, scheduling_info = core_runner(in_model=in_model, representative_data_gen=representative_data_gen, @@ -155,7 +163,8 @@ def keras_post_training_quantization(in_model: Model, fw_impl=fw_impl, fqc=framework_platform_capabilities, target_resource_utilization=target_resource_utilization, - tb_w=tb_w) + tb_w=tb_w, + progress_info_controller=progress_info_controller) # At this point, tg is a graph that went through substitutions (such as BN folding) and is # ready for quantization (namely, it holds quantization params, etc.) but the weights are @@ -171,6 +180,9 @@ def keras_post_training_quantization(in_model: Model, fw_impl, tb_w) + if progress_info_controller is not None: + progress_info_controller.set_description("MCT Graph Finalization") + if core_config.debug_config.analyze_similarity: quantized_graph = quantize_graph_weights(graph_with_stats_correction) analyzer_model_quantization(representative_data_gen, @@ -185,6 +197,10 @@ def keras_post_training_quantization(in_model: Model, exportable_model = add_metadata(exportable_model, create_model_metadata(fqc=framework_platform_capabilities, scheduling_info=scheduling_info)) + + if progress_info_controller is not None: + progress_info_controller.close() + return exportable_model, user_info diff --git a/model_compression_toolkit/ptq/pytorch/quantization_facade.py b/model_compression_toolkit/ptq/pytorch/quantization_facade.py index 26ab2f796..890e895bd 100644 --- a/model_compression_toolkit/ptq/pytorch/quantization_facade.py +++ b/model_compression_toolkit/ptq/pytorch/quantization_facade.py @@ -26,6 +26,8 @@ from model_compression_toolkit.core import CoreConfig from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import \ MixedPrecisionQuantizationConfig +from model_compression_toolkit.core.common.progress_config.progress_info_controller import \ + ProgressInfoController, research_progress_total from model_compression_toolkit.core.runner import core_runner from model_compression_toolkit.ptq.runner import ptq_runner from model_compression_toolkit.core.analyzer import analyzer_model_quantization @@ -119,6 +121,12 @@ def pytorch_post_training_quantization(in_module: Module, framework_platform_capabilities = attach2pytorch.attach(target_platform_capabilities, core_config.quantization_config.custom_tpc_opset_to_layer) + progress_info_controller = ProgressInfoController( + total_step=research_progress_total(core_config, target_resource_utilization), + description="MCT PyTorch PTQ Progress", + progress_info_callback=core_config.debug_config.progress_info_callback + ) + # Ignore hessian info service as it is not used here yet. tg, bit_widths_config, _, scheduling_info = core_runner(in_model=in_module, representative_data_gen=representative_data_gen, @@ -127,7 +135,8 @@ def pytorch_post_training_quantization(in_module: Module, fw_impl=fw_impl, fqc=framework_platform_capabilities, target_resource_utilization=target_resource_utilization, - tb_w=tb_w) + tb_w=tb_w, + progress_info_controller=progress_info_controller) # At this point, tg is a graph that went through substitutions (such as BN folding) and is # ready for quantization (namely, it holds quantization params, etc.) but the weights are @@ -143,6 +152,9 @@ def pytorch_post_training_quantization(in_module: Module, fw_impl, tb_w) + if progress_info_controller is not None: + progress_info_controller.set_description("MCT Graph Finalization") + if core_config.debug_config.analyze_similarity: quantized_graph = quantize_graph_weights(graph_with_stats_correction) analyzer_model_quantization(representative_data_gen, @@ -157,6 +169,10 @@ def pytorch_post_training_quantization(in_module: Module, exportable_model = add_metadata(exportable_model, create_model_metadata(fqc=framework_platform_capabilities, scheduling_info=scheduling_info)) + + if progress_info_controller is not None: + progress_info_controller.close() + return exportable_model, user_info diff --git a/tests_pytest/common_tests/unit_tests/core/mixed_precision/sensitivity_eval/test_sensitivity_evaluator.py b/tests_pytest/common_tests/unit_tests/core/mixed_precision/sensitivity_eval/test_sensitivity_evaluator.py index fbe7456b5..d2b4c3c1e 100644 --- a/tests_pytest/common_tests/unit_tests/core/mixed_precision/sensitivity_eval/test_sensitivity_evaluator.py +++ b/tests_pytest/common_tests/unit_tests/core/mixed_precision/sensitivity_eval/test_sensitivity_evaluator.py @@ -48,8 +48,9 @@ def init(s, *args, **kwargs): kwargs = dict(custom_metric_fn=Mock()) if custom else {} mp_config = MixedPrecisionQuantizationConfig(**kwargs) hessian_mock = Mock() # we only check the object is passed to calculator as is + progress_info_mock = Mock() # we only check the object is passed to calculator as is se = SensitivityEvaluation(graph_mock, mp_config, repr_datagen, fw_info=fw_info_mock, fw_impl=fw_impl_mock, - hessian_info_service=hessian_mock) + hessian_info_service=hessian_mock, progress_info_controller=progress_info_mock) # compare exact types in case there is inheritance between calculators assert type(se.metric_calculator) is calc_type @@ -57,7 +58,8 @@ def init(s, *args, **kwargs): init_spy.assert_called_once_with(graph_mock, mp_config.custom_metric_fn) else: init_spy.assert_called_once_with(graph_mock, mp_config, repr_datagen, fw_info=fw_info_mock, - fw_impl=fw_impl_mock, hessian_info_service=hessian_mock) + fw_impl=fw_impl_mock, hessian_info_service=hessian_mock, + progress_info_controller=progress_info_mock) build_mp_model_mock.assert_called_with(graph_mock, [1, 2, 3], False) assert se.mp_model == build_mp_model_mock.return_value[0] diff --git a/tests_pytest/common_tests/unit_tests/core/progress_config/test_progress_control_module.py b/tests_pytest/common_tests/unit_tests/core/progress_config/test_progress_control_module.py new file mode 100644 index 000000000..2e51dac16 --- /dev/null +++ b/tests_pytest/common_tests/unit_tests/core/progress_config/test_progress_control_module.py @@ -0,0 +1,169 @@ +# Copyright 2026 Sony Semiconductor Solutions, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import pytest + +from typing import Callable +from tqdm import tqdm + +from model_compression_toolkit.core.common.quantization.debug_config import DebugConfig +from model_compression_toolkit.core.common.progress_config.progress_info_controller import \ + ProgressInfoController +from model_compression_toolkit.core.common.progress_config.constants import \ + COMPLETED_COMPONENTS, TOTAL_COMPONENTS, CURRENT_COMPONENT + + +def check_callback_function(info): + pass + + +class CheckCallBackFunction: + def __init__(self): + self.history = [] + self.count = 0 + + def __call__(self, info): + self.history.append({ + COMPLETED_COMPONENTS: info[COMPLETED_COMPONENTS], + TOTAL_COMPONENTS: info[TOTAL_COMPONENTS], + CURRENT_COMPONENT: info[CURRENT_COMPONENT], + }) + self.count += 1 + + +class TestProgessInfoController: + + ### Initialization Test + @pytest.mark.parametrize( + "total_step, callback_function, expected", + [ + pytest.param(-1, None, None, id="unset_callback_and_no_steps"), + pytest.param(1, None, None, id="unset_callback_and_with_steps"), + pytest.param(0, CheckCallBackFunction(), None, id="set_callback_and_no_steps"), + pytest.param(2, CheckCallBackFunction(), ProgressInfoController, id="set_callback_and_steps"), + pytest.param(2, check_callback_function, ProgressInfoController, id="set_callback_function_and_steps"), + ], + ) + def test_progress_info_controller_initalize(self, total_step, callback_function, expected): + controller = ProgressInfoController( + total_step=total_step, + progress_info_callback=callback_function, + description='Unit Test' + ) + + if expected is None: + ### Expected value verification (None) + assert controller is expected + else: + ### Expected value verification (ProgressInfoController) + assert isinstance(controller, expected) + assert isinstance(controller.pbar, tqdm) + + ### Verify the initialization of class member variables + assert controller.total_step == total_step + assert controller.current_step == 0 + assert controller.description == 'Unit Test' + assert callable(controller.progress_info_callback) + + ### Initialization Invalid Test + @pytest.mark.parametrize( + "callback_function", + [ + pytest.param(30, id="set_type_is_int"), + pytest.param('callback', id="set_type_is_str"), + pytest.param([check_callback_function], id="set_type_is_list"), + ], + ) + def test_progress_info_controller_initalize_invalid(self, callback_function): + with pytest.raises(TypeError) as err_msg: + controller = ProgressInfoController( + total_step=1, + progress_info_callback=callback_function, + description='Initialization Invalid Test' + ) + + ### Verify assertion error message + assert str(err_msg.value) == \ + f"progress_info_callback must be a callable (function or callable instance)." + + ### Normal Test + def test_progress_info_controller_update_description(self): + controller = ProgressInfoController( + total_step=2, + progress_info_callback=CheckCallBackFunction(), + ) + + controller.set_description("Preprocessing") + controller.set_description("Finalization") + + callback = controller.progress_info_callback + + ### Verify callback was called 2 times + assert callback.count == 2 + + ### Verify first call + assert callback.history[0][COMPLETED_COMPONENTS] == "Preprocessing" + assert callback.history[0][TOTAL_COMPONENTS] == 2 + assert callback.history[0][CURRENT_COMPONENT] == 1 + + ### Verify second call + assert callback.history[1][COMPLETED_COMPONENTS] == "Finalization" + assert callback.history[1][TOTAL_COMPONENTS] == 2 + assert callback.history[1][CURRENT_COMPONENT] == 2 + + controller.close() + + ### Verify pbar is closed + assert controller.pbar is None + + ### Invalid Test + def test_progress_info_controller_invalid_count_check(self): + controller = ProgressInfoController( + total_step=1, + progress_info_callback=CheckCallBackFunction(), + description='Invalid Test' + ) + + with pytest.raises(AssertionError) as err_msg: + controller.set_description("Preprocessing") + controller.set_description("Finalization") + + ### Verify assertion error message + assert str(err_msg.value) == \ + f"current_step: 2, exceeded total_step: 1." + + ### Verify pbar is safely closed + assert controller.pbar is None + + ### Verify callback was called 1 time + callback = controller.progress_info_callback + assert callback.count == 1 + + ### DebugConfig Variable Test + @pytest.mark.parametrize( + "callback_function, expected", + [ + pytest.param(None, None, id="unset_callback"), + pytest.param(check_callback_function, Callable, id="set_callback_of_function"), + pytest.param(CheckCallBackFunction(), CheckCallBackFunction, id="set_callback_of_class"), + ], + ) + def test_adding_debug_config_menber_variable(self, callback_function, expected): + debug_config = DebugConfig(progress_info_callback=callback_function) + + if expected is None: + assert debug_config.progress_info_callback == expected + else: + assert callable(debug_config.progress_info_callback) diff --git a/tests_pytest/common_tests/unit_tests/core/progress_config/test_research_progress_total.py b/tests_pytest/common_tests/unit_tests/core/progress_config/test_research_progress_total.py new file mode 100644 index 000000000..534044506 --- /dev/null +++ b/tests_pytest/common_tests/unit_tests/core/progress_config/test_research_progress_total.py @@ -0,0 +1,148 @@ +# Copyright 2026 Sony Semiconductor Solutions, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import pytest +from unittest.mock import Mock + +from model_compression_toolkit.core.common.progress_config.progress_info_controller import \ + research_progress_total + + +MOCK_OBJ = Mock() + + +def mock_core_config( + mixed_precision_config=None +): + core_config = Mock() + core_config.mixed_precision_config = mixed_precision_config + core_config.is_mixed_precision_enabled = bool(mixed_precision_config) + + return core_config + + +def mock_mixed_precision_config( + use_hessian_based_scores=False +): + if use_hessian_based_scores is None: + mixed_precision_config = None + else: + mixed_precision_config = Mock() + mixed_precision_config.use_hessian_based_scores = use_hessian_based_scores + + return mixed_precision_config + + +def mock_gptq_config( + hessian_weights_config=None +): + gptq_config = Mock() + gptq_config.hessian_weights_config = hessian_weights_config + + return gptq_config + + +def mock_resource_utilization( + is_any_restricted=False +): + if is_any_restricted is None: + resource_utilization = None + else: + resource_utilization = Mock() + resource_utilization.is_any_restricted.return_value = is_any_restricted + + return resource_utilization + + +class TestResearchProgressTotal: + + ### PTQ (Single Precision) + @pytest.mark.parametrize( + "is_any_restricted, expected", + [ + pytest.param(None, 4, id="no_ru_flag_ptq_sp_base"), + pytest.param(False, 4, id="disable_ru_flag_ptq_sp_base"), + ], + ) + def test_ptq_sp(self, is_any_restricted, expected): + core_config = mock_core_config() + target_resource_utilization=mock_resource_utilization(is_any_restricted) + + result = research_progress_total( + core_config=core_config, + target_resource_utilization=target_resource_utilization + ) + assert result == expected + + ### PTQ (Mixed Precision) + @pytest.mark.parametrize( + "mp_hessian_enabled, expected", + [ + pytest.param(None, 5, id="unset_mp_cfg_ptq_mp"), + pytest.param(False, 5, id="mp_hessian_disable_ptq_mp"), + pytest.param(True, 6, id="mp_hessian_enable_ptq_mp"), + ], + ) + def test_ptq_mp(self, mp_hessian_enabled, expected): + core_config = mock_core_config(mixed_precision_config=mock_mixed_precision_config(mp_hessian_enabled)) + result = research_progress_total( + core_config=core_config, + target_resource_utilization=mock_resource_utilization(True), + ) + assert result == expected + + ### GPTQ (Single Precision) + @pytest.mark.parametrize( + "is_any_restricted, gptq_hessian_weights_config, expected", + [ + pytest.param(False, None, 5, id="disable_ru_flag_gptq_sp_enable_hessian"), + pytest.param(False, MOCK_OBJ, 6, id="disable_ru_flag_gptq_sp_disable_hessian"), + pytest.param(None, None, 5, id="no_ru_flag_gptq_sp_enable_hessian"), + pytest.param(None, MOCK_OBJ, 6, id="no_ru_flag_gptq_sp_disable_hessian"), + ], + ) + def test_gptq_sp(self, is_any_restricted, gptq_hessian_weights_config, expected): + core_config = mock_core_config() + gptq_config = mock_gptq_config(gptq_hessian_weights_config) + target_resource_utilization=mock_resource_utilization(is_any_restricted) + + result = research_progress_total(core_config=core_config, + gptq_config=gptq_config, + target_resource_utilization=target_resource_utilization) + assert result == expected + + ### GPTQ (Mixed Precision) + @pytest.mark.parametrize( + "mp_hessian_enabled, gptq_hessian_weights_config, expected", + [ + pytest.param(None, None, 6, id="unset_mp_cfg_and_hessian_w_cfg_gptq_mp"), + pytest.param(False, None, 6, id="all_disabled_hessian_gptq_mp"), + pytest.param(True, None, 7, id="enabled_mp_hessian_disabled_gptq_hessian"), + pytest.param(None, MOCK_OBJ, 7, id="unset_mp_cfg_and_set_hessian_w_cfg_gptq_mp"), + pytest.param(False, MOCK_OBJ, 7, id="disabled_mp_hessian_enabled_gptq_hessian"), + pytest.param(True, MOCK_OBJ, 8, id="all_enabled_hessian_gptq_mp"), + ], + ) + def test_gptq_mp(self, mp_hessian_enabled, gptq_hessian_weights_config, expected): + core_config = mock_core_config(mixed_precision_config=mock_mixed_precision_config(mp_hessian_enabled)) + target_resource_utilization = mock_resource_utilization(True) + gptq_config = mock_gptq_config(gptq_hessian_weights_config) + + result = research_progress_total( + core_config=core_config, + target_resource_utilization=target_resource_utilization, + gptq_config=gptq_config, + ) + assert result == expected diff --git a/tests_pytest/keras_tests/e2e_tests/test_progress_visualization_keras.py b/tests_pytest/keras_tests/e2e_tests/test_progress_visualization_keras.py new file mode 100644 index 000000000..dd2310783 --- /dev/null +++ b/tests_pytest/keras_tests/e2e_tests/test_progress_visualization_keras.py @@ -0,0 +1,123 @@ +# Copyright 2026 Sony Semiconductor Solutions, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +import pytest + +import model_compression_toolkit as mct + +import tensorflow as tf +import tensorflow.keras as keras +import numpy as np + +tf.config.run_functions_eagerly(True) + + +class E2ETestProgressInfoCallback: + def __init__(self): + self.history = [] + + def __call__(self, info): + self.history.append(info) + + +def representative_data_gen(): + yield [np.random.randn(1, 8, 8, 3)] + + +class TestKerasProgressVisualization: + + def _build_test_model(self): + x = keras.layers.Input((8, 8, 3)) + y = keras.layers.Conv2D(filters=8, kernel_size=3)(x) + y = keras.layers.BatchNormalization()(y) + y = keras.layers.ReLU()(y) + return keras.Model(inputs=x, outputs=y) + + def _build_expected_prog_info(self, core_config, resource_utilization, gptq_config): + + expected_str_list = ["MCT Graph Preprocessing", "Statistics Collection", "Calculate Quantization Parameters"] + + if resource_utilization is not None and resource_utilization.is_any_restricted(): + if core_config.mixed_precision_config is not None and core_config.mixed_precision_config.use_hessian_based_scores: + expected_str_list.append("Compute Hessian for Mixed Precision") + expected_str_list.append("Research Mixed Precision") + + if gptq_config is not None: + if gptq_config.hessian_weights_config is not None: + expected_str_list.append("Compute Hessian for GPTQ") + expected_str_list.append("Train with GPTQ") + + expected_str_list.append("MCT Graph Finalization") + + expected_components = [ + { + "completedComponents": component, + "totalComponents": len(expected_str_list), + "currentComponent": idx, + } + for idx, component in enumerate(expected_str_list, start=1) + ] + + return expected_components + + @pytest.mark.parametrize('is_enable_gptq_hessian', [False, True]) + @pytest.mark.parametrize('is_enable_mp_hessian', [False, True]) + @pytest.mark.parametrize('is_enable_mp', [False, True]) + @pytest.mark.parametrize('q_method', ['ptq', 'gptq']) + def test_keras_progress_visualization(self, q_method, is_enable_mp, is_enable_mp_hessian, is_enable_gptq_hessian): + if q_method == 'ptq' and is_enable_gptq_hessian: + pytest.skip("Skipping because the combination 'ptq' x 'gptq_hessian' is invalid.") + + float_model = self._build_test_model() + callback_func = E2ETestProgressInfoCallback() + + tpc = mct.get_target_platform_capabilities() + core_config = mct.core.CoreConfig(debug_config=mct.core.DebugConfig( + progress_info_callback=callback_func), + mixed_precision_config=mct.core.MixedPrecisionQuantizationConfig( + num_of_images=1, + use_hessian_based_scores=is_enable_mp_hessian)) + if is_enable_mp: + resource_utilization_data = mct.core.keras_resource_utilization_data(float_model, + representative_data_gen, + core_config=core_config, + target_platform_capabilities=tpc) + resource_utilization = mct.core.ResourceUtilization(weights_memory=resource_utilization_data.weights_memory * 0.9) + else: + resource_utilization = None + + if q_method == 'gptq': + gptq_config = mct.gptq.get_keras_gptq_config(n_epochs=3, + use_hessian_based_weights=is_enable_gptq_hessian, + use_hessian_sample_attention=is_enable_gptq_hessian) + else: + gptq_config = None + + + if q_method == 'ptq': + _, _ = mct.ptq.keras_post_training_quantization(in_model=float_model, + representative_data_gen=representative_data_gen, + target_resource_utilization=resource_utilization, + core_config=core_config, + target_platform_capabilities=tpc) + elif q_method == 'gptq': + _, _ = mct.gptq.keras_gradient_post_training_quantization(in_model=float_model, + representative_data_gen=representative_data_gen, + target_resource_utilization=resource_utilization, + gptq_config=gptq_config, + core_config=core_config, + target_platform_capabilities=tpc) + + expected_history = self._build_expected_prog_info(core_config, resource_utilization, gptq_config) + assert callback_func.history == expected_history diff --git a/tests_pytest/pytorch_tests/e2e_tests/test_progress_visualization_pytorch.py b/tests_pytest/pytorch_tests/e2e_tests/test_progress_visualization_pytorch.py new file mode 100644 index 000000000..936a6dacd --- /dev/null +++ b/tests_pytest/pytorch_tests/e2e_tests/test_progress_visualization_pytorch.py @@ -0,0 +1,130 @@ +# Copyright 2026 Sony Semiconductor Solutions, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +import pytest + +import model_compression_toolkit as mct + +import torch +from torch import nn + + +class E2ETestProgressInfoCallback: + def __init__(self): + self.history = [] + + def __call__(self, info): + self.history.append(info) + + +def representative_data_gen(): + yield [torch.randn(1, 3, 8, 8)] + + +class TestPytorchProgressVisualization: + + def _build_test_model(self): + + class Model(nn.Module): + def __init__(self): + super().__init__() + self.conv = nn.Conv2d(in_channels=3, out_channels=8, kernel_size=3) + self.bn = nn.BatchNorm2d(8) + self.relu = nn.ReLU() + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.relu(x) + return x + + return Model() + + def _build_expected_prog_info(self, core_config, resource_utilization, gptq_config): + + expected_str_list = ["MCT Graph Preprocessing", "Statistics Collection", "Calculate Quantization Parameters"] + + if resource_utilization is not None and resource_utilization.is_any_restricted(): + if core_config.mixed_precision_config is not None and core_config.mixed_precision_config.use_hessian_based_scores: + expected_str_list.append("Compute Hessian for Mixed Precision") + expected_str_list.append("Research Mixed Precision") + + if gptq_config is not None: + if gptq_config.hessian_weights_config is not None: + expected_str_list.append("Compute Hessian for GPTQ") + expected_str_list.append("Train with GPTQ") + + expected_str_list.append("MCT Graph Finalization") + + expected_components = [ + { + "completedComponents": component, + "totalComponents": len(expected_str_list), + "currentComponent": idx, + } + for idx, component in enumerate(expected_str_list, start=1) + ] + + return expected_components + + @pytest.mark.parametrize('is_enable_gptq_hessian', [False, True]) + @pytest.mark.parametrize('is_enable_mp_hessian', [False, True]) + @pytest.mark.parametrize('is_enable_mp', [False, True]) + @pytest.mark.parametrize('q_method', ['ptq', 'gptq']) + def test_pytorch_progress_visualization(self, q_method, is_enable_mp, is_enable_mp_hessian, is_enable_gptq_hessian): + if q_method == 'ptq' and is_enable_gptq_hessian: + pytest.skip("Skipping because the combination 'ptq' x 'gptq_hessian' is invalid.") + + float_model = self._build_test_model() + callback_func = E2ETestProgressInfoCallback() + + tpc = mct.get_target_platform_capabilities() + core_config = mct.core.CoreConfig(debug_config=mct.core.DebugConfig( + progress_info_callback=callback_func), + mixed_precision_config=mct.core.MixedPrecisionQuantizationConfig( + num_of_images=1, + use_hessian_based_scores=is_enable_mp_hessian)) + if is_enable_mp: + resource_utilization_data = mct.core.pytorch_resource_utilization_data(float_model, + representative_data_gen, + core_config=core_config, + target_platform_capabilities=tpc) + resource_utilization = mct.core.ResourceUtilization(weights_memory=resource_utilization_data.weights_memory * 0.9) + else: + resource_utilization = None + + if q_method == 'gptq': + gptq_config = mct.gptq.get_pytorch_gptq_config(n_epochs=3, + use_hessian_based_weights=is_enable_gptq_hessian, + use_hessian_sample_attention=is_enable_gptq_hessian) + else: + gptq_config = None + + + if q_method == 'ptq': + _, _ = mct.ptq.pytorch_post_training_quantization(in_module=float_model, + representative_data_gen=representative_data_gen, + target_resource_utilization=resource_utilization, + core_config=core_config, + target_platform_capabilities=tpc) + elif q_method == 'gptq': + _, _ = mct.gptq.pytorch_gradient_post_training_quantization(model=float_model, + representative_data_gen=representative_data_gen, + target_resource_utilization=resource_utilization, + gptq_config=gptq_config, + core_config=core_config, + target_platform_capabilities=tpc) + + expected_history = self._build_expected_prog_info(core_config, resource_utilization, gptq_config) + assert callback_func.history == expected_history