quantization module prototype

junliang-lin · junliang-lin · commit 9b5a9dca7a62 · 2023-03-01T23:07:58.000-05:00
diff --git a/bayesian_torch/__init__.py b/bayesian_torch/__init__.py
@@ -0,0 +1 @@
+from bayesian_torch import quantization as quantization
diff --git a/bayesian_torch/ao/quantization/__init__.py b/bayesian_torch/ao/quantization/__init__.py
@@ -1,2 +1,3 @@
 ## bayesian_torch.quantization.prepare
-## bayesian_torch.quantization.convert
+## bayesian_torch.quantization.convert
+from .quantize import *
diff --git a/bayesian_torch/ao/quantization/quantize.py b/bayesian_torch/ao/quantization/quantize.py
@@ -1,9 +1,163 @@
-"""
-define prepare and convert function
-"""
+# Copyright (C) 2021 Intel Labs
+#
+# BSD-3-Clause License
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+# 1. Redistributions of source code must retain the above copyright notice,
+#    this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+# 3. Neither the name of the copyright holder nor the names of its contributors
+#    may be used to endorse or promote products derived from this software
+#    without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
+# OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+# OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Define prepare and convert function
+#
 
-def prepare():
-    return
+import torch
+import torch.nn as nn
+from bayesian_torch.models.bayesian.resnet_variational_large import (
+    BasicBlock,
+    Bottleneck,
+    ResNet,
+)
+from typing import Any, List, Optional, Type, Union
+from torch import Tensor
+from bayesian_torch.models.bnn_to_qbnn import bnn_to_qbnn
+# import copy
 
-def convert():
-    return
+__all__ = [
+    "prepare",
+    "convert",
+]
+
+class QuantizableBasicBlock(BasicBlock):
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+        self.add_relu = torch.nn.quantized.FloatFunctional()
+
+    def forward(self, x: Tensor) -> Tensor:
+        identity = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        if self.downsample is not None:
+            identity = self.downsample(x)
+
+        out = self.add_relu.add_relu(out, identity)
+
+        return out
+
+
+class QuantizableBottleneck(Bottleneck):
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+        self.skip_add_relu = nn.quantized.FloatFunctional()
+        self.relu1 = nn.ReLU(inplace=False)
+        self.relu2 = nn.ReLU(inplace=False)
+
+    def forward(self, x: Tensor) -> Tensor:
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu1(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu2(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out = self.skip_add_relu.add_relu(out, identity)
+
+        return out
+
+
+class QuantizableResNet(ResNet):
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+
+        self.quant = torch.ao.quantization.QuantStub()
+        self.dequant = torch.ao.quantization.DeQuantStub()
+
+    def forward(self, x: Tensor) -> Tensor:
+        x = self.quant(x)
+
+        x= self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+
+        for layer in self.layer1:
+            x=layer(x)
+
+        for layer in self.layer2:
+            x = layer(x)
+
+        for layer in self.layer3:
+            x = layer(x)
+
+        for layer in self.layer4:
+            x = layer(x)
+
+        x = self.avgpool(x)
+        x = torch.flatten(x, 1)
+        x = self.fc(x)
+
+
+        # x = self.dequant(x)
+        return x
+
+
+
+def enable_prepare(m):
+    for name, value in list(m._modules.items()):
+        if m._modules[name]._modules:
+            enable_prepare(m._modules[name])
+        elif "Reparameterization" in m._modules[name].__class__.__name__ or "Flipout" in m._modules[name].__class__.__name__:
+            prepare = getattr(m._modules[name], "prepare", None)
+            if callable(prepare):
+                m._modules[name].prepare()
+                m._modules[name].dnn_to_bnn_flag=True
+
+
+def prepare(model):
+    """
+    1. construct quantizable model
+    2. traverse the model to enable the prepare function in each layer
+    3. run torch.quantize.prepare()
+    """
+    qmodel = QuantizableResNet(QuantizableBottleneck, [3, 4, 6, 3])
+    qmodel.load_state_dict(model.state_dict())
+    qmodel.eval()
+    enable_prepare(qmodel)
+    qmodel.qconfig = torch.quantization.get_default_qconfig("fbgemm")
+    qmodel = torch.quantization.prepare(qmodel)
+
+    return qmodel
+
+def convert(model):
+    qmodel = torch.quantization.convert(model) # torch layers
+    bnn_to_qbnn(qmodel) # bayesian layers
+    return qmodel
diff --git a/bayesian_torch/examples/quantization_test.py b/bayesian_torch/examples/quantization_test.py
@@ -0,0 +1,34 @@
+# import torch 
+# import bayesian_torch
+# from bayesian_torch.ao.quantization import prepare, convert
+# import bayesian_torch.models.bayesian.resnet_variational_large as resnet
+# from bayesian_torch.models.bnn_to_qbnn import bnn_to_qbnn
+
+# model = resnet.__dict__['resnet50']()
+
+# input = torch.randn(1,3,224,224)
+# mp = prepare(model)
+# mp(input) # haven't replaced the batchnorm layer
+# qmodel = torch.quantization.convert(mp)
+# bnn_to_qbnn(qmodel)
+
+
+import torch
+import bayesian_torch
+import bayesian_torch.models.bayesian.resnet_variational_large as resnet
+
+m = resnet.__dict__['resnet50']()
+# alternative way to construct a bnn model
+# from bayesian_torch.models.dnn_to_bnn import dnn_to_bnn
+# m = torchvision.models.resnet50(weights="IMAGENET1K_V1")
+# dnn_to_bnn(m)
+
+
+
+mp = bayesian_torch.quantization.prepare(m)
+input = torch.randn(1,3,224,224)
+mp(input) # calibration
+mq = bayesian_torch.quantization.convert(mp)
+
+
+
diff --git a/bayesian_torch/layers/variational_layers/quantize_conv_variational.py b/bayesian_torch/layers/variational_layers/quantize_conv_variational.py
@@ -93,6 +93,7 @@ def __init__(self,
         self.bn_eps = None
 
         self.is_dequant = False
+        self.quant_dict = None
 
     def get_scale_and_zero_point(self, x, upper_bound=100, target_range=255):
         """ An implementation for symmetric quantization
@@ -237,7 +238,26 @@ def forward(self, input, enable_int8_compute=True, normal_scale=6/255, default_s
         if self.dnn_to_bnn_flag:
             return_kl = False
 
-        if not enable_int8_compute: # Deprecated. Use this method for reducing model size only.
+        if self.quant_dict is not None:
+            eps_kernel = torch.quantize_per_tensor(self.eps_kernel.data.normal_(), self.quant_dict[0]['scale'], self.quant_dict[0]['zero_point'], torch.qint8) # Quantize a tensor from normal distribution. 99.7% values will lie within 3 standard deviations, so the original range is set as 6.
+            weight = torch.ops.quantized.mul(self.quantized_sigma_weight, eps_kernel, self.quant_dict[1]['scale'], self.quant_dict[1]['zero_point'])
+            weight = torch.ops.quantized.add(weight, self.quantized_mu_weight, self.quant_dict[2]['scale'], self.quant_dict[2]['zero_point'])
+            bias = None
+
+            ## DO NOT QUANTIZE BIAS!!!
+            if self.bias:
+                if self.quantized_sigma_bias is None: # the case that bias comes from bn fusion
+                    bias = self.quantized_mu_bias
+                else: # original case
+                    bias = self.quantized_mu_bias + (self.quantized_sigma_bias * self.eps_bias.data.normal_())
+
+            if input.dtype!=torch.quint8: # check if input has been quantized
+                input = torch.quantize_per_tensor(input, self.quant_dict[3]['scale'], self.quant_dict[3]['zero_point'], torch.quint8) # scale=0.1 by grid search; zero_point=128 for uint8 format
+
+            out = torch.nn.quantized.functional.conv1d(input, weight, bias, self.stride, self.padding,
+                        self.dilation, self.groups, scale=self.quant_dict[4]['scale'], zero_point=self.quant_dict[4]['zero_point']) # input: quint8, weight: qint8, bias: fp32
+
+        elif not enable_int8_compute: # Deprecated. Use this method for reducing model size only.
             if not self.is_dequant:
                 self.dequantize()
                 self.is_dequant = True
@@ -323,6 +343,7 @@ def __init__(self,
         self.bn_eps = None
 
         self.is_dequant = False
+        self.quant_dict = None
 
     def get_scale_and_zero_point(self, x, upper_bound=100, target_range=255):
         """ An implementation for symmetric quantization
@@ -419,6 +440,10 @@ def quantize(self):
         delattr(self, "bn_running_var")
         delattr(self, "bn_eps")
 
+        delattr(self, "qint_quant")
+        delattr(self, "quint_quant")
+        delattr(self, "dequant")
+
     def dequantize(self): # Deprecated. Only for forward mode #1.
         self.mu_kernel = self.get_dequantized_tensor(self.quantized_mu_weight)
         self.sigma_weight = self.get_dequantized_tensor(self.quantized_sigma_weight)
@@ -466,7 +491,26 @@ def forward(self, input, enable_int8_compute=True, normal_scale=6/255, default_s
         if self.dnn_to_bnn_flag:
             return_kl = False
         
-        if not enable_int8_compute: # Deprecated. Use this method for reducing model size only.
+        if self.quant_dict is not None:
+            eps_kernel = torch.quantize_per_tensor(self.eps_kernel.data.normal_(), self.quant_dict[0]['scale'], self.quant_dict[0]['zero_point'], torch.qint8) # Quantize a tensor from normal distribution. 99.7% values will lie within 3 standard deviations, so the original range is set as 6.
+            weight = torch.ops.quantized.mul(self.quantized_sigma_weight, eps_kernel, self.quant_dict[1]['scale'], self.quant_dict[1]['zero_point'])
+            weight = torch.ops.quantized.add(weight, self.quantized_mu_weight, self.quant_dict[2]['scale'], self.quant_dict[2]['zero_point'])
+            bias = None
+
+            ## DO NOT QUANTIZE BIAS!!!
+            if self.bias:
+                if self.quantized_sigma_bias is None: # the case that bias comes from bn fusion
+                    bias = self.quantized_mu_bias
+                else: # original case
+                    bias = self.quantized_mu_bias + (self.quantized_sigma_bias * self.eps_bias.data.normal_())
+
+            if input.dtype!=torch.quint8: # check if input has been quantized
+                input = torch.quantize_per_tensor(input, self.quant_dict[3]['scale'], self.quant_dict[3]['zero_point'], torch.quint8) # scale=0.1 by grid search; zero_point=128 for uint8 format
+
+            out = torch.nn.quantized.functional.conv2d(input, weight, bias, self.stride, self.padding,
+                        self.dilation, self.groups, scale=self.quant_dict[4]['scale'], zero_point=self.quant_dict[4]['zero_point']) # input: quint8, weight: qint8, bias: fp32
+
+        elif not enable_int8_compute: # Deprecated. Use this method for reducing model size only.
             if not self.is_dequant:
                 self.dequantize()
                 self.is_dequant = True
@@ -550,6 +594,7 @@ def __init__(self,
         self.bn_eps = None
 
         self.is_dequant = False
+        self.quant_dict = None
 
     def get_scale_and_zero_point(self, x, upper_bound=100, target_range=255):
         """ An implementation for symmetric quantization
@@ -693,7 +738,26 @@ def forward(self, input, enable_int8_compute=True, normal_scale=6/255, default_s
         if self.dnn_to_bnn_flag:
             return_kl = False
 
-        if not enable_int8_compute: # Deprecated. Use this method for reducing model size only.
+        if self.quant_dict is not None:
+            eps_kernel = torch.quantize_per_tensor(self.eps_kernel.data.normal_(), self.quant_dict[0]['scale'], self.quant_dict[0]['zero_point'], torch.qint8) # Quantize a tensor from normal distribution. 99.7% values will lie within 3 standard deviations, so the original range is set as 6.
+            weight = torch.ops.quantized.mul(self.quantized_sigma_weight, eps_kernel, self.quant_dict[1]['scale'], self.quant_dict[1]['zero_point'])
+            weight = torch.ops.quantized.add(weight, self.quantized_mu_weight, self.quant_dict[2]['scale'], self.quant_dict[2]['zero_point'])
+            bias = None
+
+            ## DO NOT QUANTIZE BIAS!!!
+            if self.bias:
+                if self.quantized_sigma_bias is None: # the case that bias comes from bn fusion
+                    bias = self.quantized_mu_bias
+                else: # original case
+                    bias = self.quantized_mu_bias + (self.quantized_sigma_bias * self.eps_bias.data.normal_())
+
+            if input.dtype!=torch.quint8: # check if input has been quantized
+                input = torch.quantize_per_tensor(input, self.quant_dict[3]['scale'], self.quant_dict[3]['zero_point'], torch.quint8) # scale=0.1 by grid search; zero_point=128 for uint8 format
+
+            out = torch.nn.quantized.functional.conv3d(input, weight, bias, self.stride, self.padding,
+                        self.dilation, self.groups, scale=self.quant_dict[4]['scale'], zero_point=self.quant_dict[4]['zero_point']) # input: quint8, weight: qint8, bias: fp32
+
+        elif not enable_int8_compute: # Deprecated. Use this method for reducing model size only.
             if not self.is_dequant:
                 self.dequantize()
                 self.is_dequant = True
diff --git a/bayesian_torch/models/bayesian/resnet_variational_large.py b/bayesian_torch/models/bayesian/resnet_variational_large.py
@@ -14,7 +14,7 @@
 from bayesian_torch.layers import BatchNorm2dLayer
 
 __all__ = [
-    'ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152'
+    'ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152', 'BasicBlock', 'Bottleneck'
 ]
 
 prior_mu = 0.0
@@ -200,7 +200,7 @@ def _make_layer(self, block, planes, blocks, stride=1):
                                          posterior_mu_init=posterior_mu_init,
                                          posterior_rho_init=posterior_rho_init,
                                          bias=False),
-                BatchNorm2dLayer(planes * block.expansion),
+                nn.BatchNorm2d(planes * block.expansion),
             )
 
         layers = []
diff --git a/bayesian_torch/quantization/__init__.py b/bayesian_torch/quantization/__init__.py
@@ -0,0 +1,3 @@
+from .quantize import * 
+
+# __all__ = ['prepare', 'convert']
diff --git a/bayesian_torch/quantization/quantize.py b/bayesian_torch/quantization/quantize.py
@@ -0,0 +1,2 @@
+from bayesian_torch.ao.quantization.quantize import prepare
+from bayesian_torch.ao.quantization.quantize import convert

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+from bayesian_torch import quantization as quantization`
Original file line number	Diff line number	Diff line change
`@@ -14,7 +14,7 @@`
`14`	`14`	`from bayesian_torch.layers import BatchNorm2dLayer`
`15`	`15`
`16`	`16`	`__all__ = [`
`17`		`- 'ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152'`
	`17`	`+ 'ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152', 'BasicBlock', 'Bottleneck'`
`18`	`18`	`]`
`19`	`19`
`20`	`20`	`prior_mu = 0.0`
`@@ -200,7 +200,7 @@ def _make_layer(self, block, planes, blocks, stride=1):`
`200`	`200`	`posterior_mu_init=posterior_mu_init,`
`201`	`201`	`posterior_rho_init=posterior_rho_init,`
`202`	`202`	`bias=False),`
`203`		`- BatchNorm2dLayer(planes * block.expansion),`
	`203`	`+ nn.BatchNorm2d(planes * block.expansion),`
`204`	`204`	`)`
`205`	`205`
`206`	`206`	`layers = []`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from .quantize import *`
	`2`	`+`
	`3`	`+# __all__ = ['prepare', 'convert']`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+from bayesian_torch.ao.quantization.quantize import prepare`
	`2`	`+from bayesian_torch.ao.quantization.quantize import convert`