openvinotoolkit · vinnamkim · Jul 28, 2022 · Aug 1, 2022 · Aug 24, 2022 · Oct 4, 2022
@@ -0,0 +1,40 @@
+# FracBits mixed-precision quantization algorithm
+
+This provides sample configurations of FracBits mixed-precision quantization algorithm for image classification tasks.
+
+## Prerequiste
+
+Please follow [installation guide](../../../torch/classification/README.md#installation) and [dataset preperation guide](../../../torch/classification/README.md#dataset-preparation) of NNCF PyTorch classification examples.
+
+## Compress FP32 model with FracBits
+
+You can run the FracBits mixed-precision quantization algorithm with the pre-defined configuration file.
+
+```bash
+cd examples/experimental/torch/classification
+python fracbits.py -m train -c <config_path> -j <num_workers> --data <dataset_path> --log-dir <path_for_logging>
+```
+
+The following describes each argument.
+
+- `-c`: FracBits configuration file path. You can find it from `examples/experimental/torch/classification/fracbits_configs`.
+- `-j`: The number of PyTorch dataloader workers.
+- `--data`: Directory path of the dataset.
+- `--log-dir`: Directory path to save log files, tensorboard logs, and model checkpoints.
+
+We provide configurations for three model architectures: `inception_v3`, `mobilenet_v2`, and `resnet50`. Our configurations almost uses the ImageNet dataset except `mobilenet_v2` which also has a configuration for the CIFAR100 dataset.
+
+## Results for FracBits
+
+|    Model     | Compression algorithm | Dataset  | Accuracy (Drop) % |                                                                       NNCF config file                                                                        | Compression rate |
+| :----------: | :-------------------: | :------: | :---------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------: | :--------------: |
+| MobileNet-V2 |       FracBits        | CIFAR100 |   67.26 (0.45)    | [mobilenet_v2_cifar100_mixed_int_fracbits_msize.json](./configs/mobilenet_v2_cifar100_mixed_int_fracbits_msize.json) |       1.5        |
+| Inception-V3 |       FracBits        | ImageNet |   78.16 (-0.82)   | [inception_v3_imagenet_mixed_int_fracbits_msize.json](./configs/inception_v3_imagenet_mixed_int_fracbits_msize.json) |       1.51       |
+| MobileNet-V2 |       FracBits        | ImageNet |   71.19 (0.68)    | [mobilenet_v2_imagenet_mixed_int_fracbits_msize.json](./configs/mobilenet_v2_imagenet_mixed_int_fracbits_msize.json) |       1.53       |
+|  ResNet-50   |       FracBits        | ImageNet |   76.12 (0.04)    |     [resnet50_imagenet_mixed_int_fracbits_msize.json](./configs/resnet50_imagenet_mixed_int_fracbits_msize.json)     |       1.54       |
+
+- We used a NVIDIA V100 x 8 machine to obtain all results except MobileNet-V2, CIFAR100 experiment.
+- Model accuracy is obtained by averaging on 5 repeats.
+- Absolute accuracy drop is compared to FP32 model accuracy reported in [Results for quantization](../../../torch/classification/README.md#results-for-quantization).
+- Compression rate is about the reduced model size compared to the initial one. The model initial state starts from INT8 quantization, so compression rate = 1.5 means that the model size is reduced to 2/3 compared to the INT8 model.
+- Model size is the total number of bits in model weights. It is computed by $\sum_i \textrm{\# of params}_i \times \textrm{bitwidth}_i$ where $\textrm{\# of params}_i$ is the number of parameters of $i$th layer and $\textrm{bitwidth}_i$ is the bit-width of $i$th layer.
@@ -0,0 +1,22 @@
+"""
+ Copyright (c) 2022 Intel Corporation
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+      http://www.apache.org/licenses/LICENSE-2.0
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+# pylint: disable=unused-import
+
+import sys
+
+from examples.torch.classification.main import main
+from nncf.experimental.torch.fracbits.builder import FracBitsQuantizationBuilder
+
+if __name__ == '__main__':
+    main(sys.argv[1:])
@@ -0,0 +1,46 @@
+{
+    "model": "inception_v3",
+    "pretrained": true,
+    "input_info": {
+      "sample_size": [2, 3, 299, 299]
+    },
+    "num_classes": 1000,
+    "batch_size" : 512,
+    "epochs": 1,
+    "multiprocessing_distributed" : true,
+    "find_unused_parameters": true,
+    "optimizer": {
+        "type": "SGD",
+        "base_lr": 1e-3,
+        "schedule_type": "multistep",
+        "steps": [
+            1
+        ]
+    },
+    "compression": {
+        "algorithm": "fracbits_quantization",
+        "overflow_fix": "disable",
+        "initializer": {
+            "range": {
+                "num_init_samples": 160
+            }
+        },
+        "ignored_scopes": [
+            "Inception3/__add___0",
+            "Inception3/__add___1",
+            "Inception3/__add___2",
+            "Inception3/__mul___0",
+            "Inception3/__mul___1",
+            "Inception3/__mul___2",
+            "Inception3/cat_0"
+        ],
+        "freeze_epoch": -1,
+        "loss": {
+            "type": "model_size",
+            "compression_rate": 1.5,
+            "criteria": "L1",
+            "flip_loss": false,
+            "alpha": 40.0
+        }
+    }
+}
@@ -0,0 +1,33 @@
+{
+    "model": "mobilenet_v2_32x32",
+    "pretrained": false,
+    "input_info": {
+      "sample_size": [2, 3, 32, 32]
+    },
+    "num_classes": 100,
+    "batch_size": 256,
+    "optimizer": {
+        "type": "SGD",
+        "base_lr": 1e-3
+    },
+    "compression": {
+        "algorithm": "fracbits_quantization",
+        "overflow_fix": "disable",
+        "activations": {
+            "mode": "asymmetric"
+        },
+        "weights": {
+            "mode": "asymmetric"
+        },
+        "freeze_epoch": 4,
+        "loss": {
+            "type": "model_size",
+            "compression_rate": 1.5,
+            "criteria": "L1",
+            "flip_loss": false,
+            "alpha": 40.0
+        }
+    },
+    "epochs": 5,
+    "dataset": "CIFAR100"
+}
@@ -0,0 +1,37 @@
+{
+    "model": "mobilenet_v2",
+    "pretrained": true,
+    "input_info": {
+      "sample_size": [2, 3, 224, 224]
+    },
+    "num_classes": 1000,
+    "batch_size" : 1024,
+    "epochs": 5,
+    "multiprocessing_distributed": true,
+    "find_unused_parameters": true,
+    "optimizer": {
+        "type": "SGD",
+        "base_lr": 1e-3,
+        "schedule_type": "multistep",
+        "steps": [
+            5
+        ]
+    },
+    "compression": {
+        "algorithm": "fracbits_quantization",
+        "overflow_fix": "disable",
+        "initializer": {
+            "range": {
+                "num_init_samples": 2560
+            }
+        },
+        "freeze_epoch": 4,
+        "loss": {
+            "type": "model_size",
+            "compression_rate": 1.5,
+            "criteria": "L1",
+            "flip_loss": false,
+            "alpha": 40.0
+        }
+    }
+}
@@ -0,0 +1,34 @@
+{
+    "model": "resnet50",
+    "pretrained": true,
+
+    "input_info": {
+      "sample_size": [1, 3, 224, 224]
+    },
+    "num_classes": 1000,
+    "batch_size": 512,
+    "epochs": 1,
+    "multiprocessing_distributed": true,
+    "find_unused_parameters": true,
+    "optimizer": {
+        "type": "SGD",
+        "base_lr": 1e-3
+    },
+    "compression": {
+        "algorithm": "fracbits_quantization",
+        "overflow_fix": "disable",
+        "initializer": {
+            "range": {
+                "num_init_samples": 850
+            }
+        },
+        "freeze_epoch": -1,
+        "loss": {
+            "type": "model_size",
+            "compression_rate": 1.5,
+            "criteria": "L1",
+            "flip_loss": false,
+            "alpha": 40.0
+        }
+    }
+}
@@ -183,6 +183,15 @@ def get_common_argument_parser():
         help="Disable compression",
         action="store_true",
     )
+
+    parser.add_argument(
+        "--find-unused-parameters",
+        help="For distributed execution mode, if it is true, "
+        "Parameters that don't receive gradients as part of this graph "
+        "are preemptively marked as being ready to be reduced. "
+        "FracBits should turn on this option if freeze_epoch > 0.",
+        action="store_true",
+    )
     return parser
 
 

@@ -73,13 +73,14 @@ def prepare_model_for_execution(model, config):
         # should always set the single device scope, otherwise,
         # DistributedDataParallel will use all available devices.
         torch.cuda.set_device(config.current_gpu)
-        model = torch.nn.parallel.distributed.DistributedDataParallel(model, device_ids=[config.current_gpu])
+        model = torch.nn.parallel.distributed.DistributedDataParallel(
+            model, device_ids=[config.current_gpu], find_unused_parameters=config.find_unused_parameters)
         model_without_dp = model.module
 
     if config.execution_mode == ExecutionMode.DISTRIBUTED:
         # DistributedDataParallel will divide and allocate batch_size to all
         # available GPUs if device_ids are not set
-        model = torch.nn.parallel.DistributedDataParallel(model)
+        model = torch.nn.parallel.DistributedDataParallel(model, find_unused_parameters=config.find_unused_parameters)
         model_without_dp = model.module
 
     if config.execution_mode == ExecutionMode.SINGLE_GPU:

@@ -8,3 +8,7 @@ returns==0.14
 opencv-python>=4.4.0.46
 torchvision==0.10.1  # should always match the torch version that is installed via NNCF's setup.py
 efficientnet_pytorch
+
+# Please see
+# https://stackoverflow.com/questions/70520120/attributeerror-module-setuptools-distutils-has-no-attribute-version
+setuptools==59.5.0
@@ -104,12 +104,13 @@ def register(self, algorithm_name: str, stats: Statistics):
             - quantization
             - filter_pruning
             - binarization
+            - fracbits_quantization
         :param stats: Statistics of the algorithm.
         """
 
         available_algorithms = [
             'magnitude_sparsity', 'rb_sparsity', 'const_sparsity',
-            'quantization', 'filter_pruning', 'binarization'
+            'quantization', 'filter_pruning', 'binarization', "fracbits_quantization"
         ]
         if algorithm_name not in available_algorithms:
             raise ValueError('Can not register statistics for the algorithm. '

@@ -290,11 +290,35 @@
     "additionalProperties": False
 }
 
+########################################################################################################################
+# FracBits Quantization
+########################################################################################################################
+FRACBITS_QUANTIZATION_ALGO_NAME_IN_CONFIG = 'fracbits_quantization'
+FRACBITS_QUANTIZATION_SCHEMA = copy.deepcopy(QUANTIZATION_SCHEMA)
+FRACBITS_QUANTIZATION_SCHEMA['properties']['algorithm']['const'] = FRACBITS_QUANTIZATION_ALGO_NAME_IN_CONFIG
+FRACBITS_QUANTIZATION_SCHEMA['properties']['freeze_epoch'] = with_attributes(
+    NUMBER, description="The number of epoch to freeze fractional bit widths to integers by rounding them.")
+FRACBITS_QUANTIZATION_SCHEMA['properties']['loss'] = {
+    "type": "object",
+    "properties": {
+        "type": with_attributes(STRING, description="Type of compression loss. Choose model_size or bitops."),
+        "compression_rate": with_attributes(NUMBER, description="Target compression rate"),
+        "criteria": with_attributes(STRING, description="Criteria to measure the distance between the target "
+                                    "compression rate and the currrent compression rate. Choose L1 or L2."),
+        "flip_loss": with_attributes(BOOLEAN, description="If true, we compute the compression loss by "
+                                     "|1 / target_compression_rate - (current_model_size / target_model_size)|, rather than "
+                                     "|target_compression_rate - (target_model_size / current_model_size)|."),
+        "alpha": with_attributes(NUMBER, description="Scale multiplier for the compression loss."),
+    },
+    "additionalProperties": False
+}
+
 ########################################################################################################################
 # All experimental schemas
 ########################################################################################################################
 
 EXPERIMENTAL_REF_VS_ALGO_SCHEMA = {
     EXPERIMENTAL_QUANTIZATION_ALGO_NAME_IN_CONFIG: EXPERIMENTAL_QUANTIZATION_SCHEMA,
-    BOOTSTRAP_NAS_ALGO_NAME_IN_CONFIG: BOOTSTRAP_NAS_SCHEMA
+    BOOTSTRAP_NAS_ALGO_NAME_IN_CONFIG: BOOTSTRAP_NAS_SCHEMA,
+    FRACBITS_QUANTIZATION_ALGO_NAME_IN_CONFIG: FRACBITS_QUANTIZATION_SCHEMA
 }
@@ -0,0 +1,49 @@
+"""
+ Copyright (c) 2022 Intel Corporation
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+      http://www.apache.org/licenses/LICENSE-2.0
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from nncf.experimental.torch.fracbits.controller import FracBitsQuantizationController
+from nncf.torch.algo_selector import PT_COMPRESSION_ALGORITHMS
+from nncf.torch.compression_method_api import PTCompressionAlgorithmController
+from nncf.torch.nncf_network import NNCFNetwork
+from nncf.torch.quantization.algo import QuantizationBuilder
+from nncf.torch.quantization.layers import PTQuantizerSetup
+from nncf.common.quantization.structs import QuantizationMode
+from nncf.experimental.torch.fracbits.quantizer import FracBitsQuantizationMode
+
+
+@PT_COMPRESSION_ALGORITHMS.register('fracbits_quantization')
+class FracBitsQuantizationBuilder(QuantizationBuilder):
+    def _get_quantizer_setup(self, target_model: NNCFNetwork) -> PTQuantizerSetup:
+        setup = super()._get_quantizer_setup(target_model)
+
+        for q_point in setup.quantization_points.values():
+            mode = q_point.qspec.mode
+            if mode == QuantizationMode.ASYMMETRIC:
+                q_point.qspec.mode = FracBitsQuantizationMode.ASYMMETRIC
+            elif mode == QuantizationMode.SYMMETRIC:
+                q_point.qspec.mode = FracBitsQuantizationMode.SYMMETRIC
+            else:
+                raise ValueError(f"qsepc.mode={mode} is unknown.")
+
+        return setup
+
+    def _build_controller(self, model: NNCFNetwork) -> PTCompressionAlgorithmController:
+        return FracBitsQuantizationController(model,
+                                              self.config,
+                                              self._debug_interface,
+                                              self._weight_quantizers,
+                                              self._non_weight_quantizers,
+                                              self._groups_of_adjacent_quantizers,
+                                              self._quantizers_input_shapes,
+                                              build_time_metric_info=self._build_time_metric_infos,
+                                              build_time_range_init_params=self._range_init_params)