qbnn example

junliang-lin · junliang-lin · commit b780aad54cb3 · 2023-03-06T02:11:38.000-05:00
diff --git a/bayesian_torch/examples/main_bayesian_imagenet_bnn2qbnn.py b/bayesian_torch/examples/main_bayesian_imagenet_bnn2qbnn.py
@@ -16,8 +16,8 @@
 import bayesian_torch.models.bayesian.resnet_variational_large as resnet
 import numpy as np
 from bayesian_torch.models.bnn_to_qbnn import bnn_to_qbnn
-# import bayesian_torch.models.bayesian.quantized_resnet_variational_large as qresnet
-import bayesian_torch.models.bayesian.quantized_resnet_flipout_large as qresnet
+import bayesian_torch.models.bayesian.quantized_resnet_variational_large as qresnet
+# import bayesian_torch.models.bayesian.quantized_resnet_flipout_large as qresnet
 
 torch.cuda.is_available = lambda : False
 os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
@@ -262,9 +262,16 @@ def main():
         model.load_state_dict(checkpoint["state_dict"])
         model.module = model.module.cpu()
 
-        bnn_to_qbnn(model, fuse_conv_bn=False)  # only replaces linear and conv layers
+        mp = bayesian_torch.quantization.prepare(model)
+        evaluate(args, mp, val_loader) # calibration
+        qmodel = bayesian_torch.quantization.convert(mp)
+        evaluate(args, qmodel, val_loader)
+
+
 
-        model = model.cpu()
+        # bnn_to_qbnn(model, fuse_conv_bn=False)  # only replaces linear and conv layers
+
+        # model = model.cpu()
 
         # save weights
         # save_checkpoint(
@@ -278,16 +285,16 @@ def main():
         #                 args.save_dir,
         #                 'quantized_bayesian_q{}_imagenet.pth'.format(args.arch)))
 
-        qmodel = torch.nn.DataParallel(qresnet.__dict__['q'+args.arch](bias=False)) # set bias=True to make qconv has bias
-        qmodel.module.quant_then_dequant(qmodel, fuse_conv_bn=False)
+        # qmodel = torch.nn.DataParallel(qresnet.__dict__['q'+args.arch](bias=False)) # set bias=True to make qconv has bias
+        # qmodel.module.quant_then_dequant(qmodel, fuse_conv_bn=False)
 
         # load weights
         # checkpoint_file = args.save_dir + "/quantized_bayesian_q{}_imagenet.pth".format(args.arch)
         # checkpoint = torch.load(checkpoint_file, map_location=torch.device("cpu"))
         # qmodel.load_state_dict(checkpoint["state_dict"])
 
-        qmodel.load_state_dict(model.state_dict())
-        evaluate(args, qmodel, val_loader)
+        # qmodel.load_state_dict(model.state_dict())
+        # evaluate(args, qmodel, val_loader)
 
 if __name__ == "__main__":
     main()
diff --git a/bayesian_torch/layers/variational_layers/linear_variational.py b/bayesian_torch/layers/variational_layers/linear_variational.py
@@ -116,6 +116,15 @@ def __init__(self,
             self.register_buffer('eps_bias', None, persistent=False)
 
         self.init_parameters()
+        self.quant_prepare=False
+    
+    def prepare(self):
+        self.qint_quant = nn.ModuleList([torch.quantization.QuantStub(
+                                         QConfig(weight=HistogramObserver.with_args(dtype=torch.qint8), activation=HistogramObserver.with_args(dtype=torch.qint8))) for _ in range(5)])
+        self.quint_quant = nn.ModuleList([torch.quantization.QuantStub(
+                                         QConfig(weight=HistogramObserver.with_args(dtype=torch.quint8), activation=HistogramObserver.with_args(dtype=torch.quint8))) for _ in range(2)])
+        self.dequant = torch.quantization.DeQuantStub()
+        self.quant_prepare=True
 
     def init_parameters(self):
         self.prior_weight_mu.fill_(self.prior_mean)
@@ -147,8 +156,10 @@ def forward(self, input, return_kl=True):
         if self.dnn_to_bnn_flag:
             return_kl = False
         sigma_weight = torch.log1p(torch.exp(self.rho_weight))
-        weight = self.mu_weight + \
-            (sigma_weight * self.eps_weight.data.normal_())
+        eps_weight = self.eps_weight.data.normal_()
+        tmp_result = sigma_weight * eps_kernel
+        weight = self.mu_weight + tmp_result
+
         if return_kl:
             kl_weight = self.kl_div(self.mu_weight, sigma_weight,
                                     self.prior_weight_mu, self.prior_weight_sigma)
@@ -162,6 +173,20 @@ def forward(self, input, return_kl=True):
                                       self.prior_bias_sigma)
 
         out = F.linear(input, weight, bias)
+
+        if self.quant_prepare:
+            # quint8 quantstub
+            input = self.quint_quant[0](input) # input
+            out = self.quint_quant[1](out) # output
+
+            # qint8 quantstub
+            sigma_weight = self.qint_quant[0](sigma_weight) # weight
+            mu_weight = self.qint_quant[1](self.mu_weight) # weight
+            eps_weight = self.qint_quant[2](eps_weight) # random variable
+            tmp_result =self.qint_quant[3](tmp_result) # multiply activation
+            weight = self.qint_quant[4](weight) # add activatation
+
+
         if return_kl:
             if self.mu_bias is not None:
                 kl = kl_weight + kl_bias
diff --git a/bayesian_torch/layers/variational_layers/quantize_linear_variational.py b/bayesian_torch/layers/variational_layers/quantize_linear_variational.py
@@ -53,6 +53,7 @@ def __init__(self,
                  out_features)
 
         self.is_dequant = False
+        self.quant_dict = None
 
     def get_scale_and_zero_point(self, x, upper_bound=100, target_range=255):
         """ An implementation for symmetric quantization
@@ -168,7 +169,26 @@ def forward(self, input, enable_int8_compute=True, normal_scale=6/255, default_s
         if self.dnn_to_bnn_flag:
             return_kl = False
 
-        if not enable_int8_compute: # Deprecated. Use this method for reducing model size only.
+        if self.quant_dict is not None:
+            eps_weight = torch.quantize_per_tensor(self.eps_weight.data.normal_(), self.quant_dict[0]['scale'], self.quant_dict[0]['zero_point'], torch.qint8) # Quantize a tensor from normal distribution. 99.7% values will lie within 3 standard deviations, so the original range is set as 6.
+            weight = torch.ops.quantized.mul(self.quantized_sigma_weight, eps_kernel, self.quant_dict[1]['scale'], self.quant_dict[1]['zero_point'])
+            weight = torch.ops.quantized.add(weight, self.quantized_mu_weight, self.quant_dict[2]['scale'], self.quant_dict[2]['zero_point'])
+            bias = None
+
+            ## DO NOT QUANTIZE BIAS!!!
+            if self.bias:
+                if self.quantized_sigma_bias is None: # the case that bias comes from bn fusion
+                    bias = self.quantized_mu_bias
+                else: # original case
+                    bias = self.quantized_mu_bias + (self.quantized_sigma_bias * self.eps_bias.data.normal_())
+
+            if input.dtype!=torch.quint8: # check if input has been quantized
+                input = torch.quantize_per_tensor(input, self.quant_dict[3]['scale'], self.quant_dict[3]['zero_point'], torch.quint8) # scale=0.1 by grid search; zero_point=128 for uint8 format
+
+            out = torch.nn.quantized.functional.linear(input, weight, bias, scale=self.quant_dict[4]['scale'], zero_point=self.quant_dict[4]['zero_point']) # input: quint8, weight: qint8, bias: fp32
+            out = out.dequantize()
+
+        elif not enable_int8_compute: # Deprecated. Use this method for reducing model size only.
             if not self.is_dequant:
                 self.dequantize()
                 self.is_dequant = True
diff --git a/bayesian_torch/models/bnn_to_qbnn.py b/bayesian_torch/models/bnn_to_qbnn.py
@@ -101,6 +101,15 @@ def qbnn_linear_layer(d):
         out_features=d.out_features,
     )
     qbnn_layer.__dict__.update(d.__dict__)
+
+    if d.quant_prepare:
+        qbnn_layer.quant_dict = []
+        for qstub in d.qint_quant:
+            qbnn_layer.quant_dict.append({'scale':qstub.scale.item(), 'zero_point':qstub.zero_point.item()})
+        qbnn_layer.quant_dict = qbnn_layer.quant_dict[2:]
+        for qstub in d.quint_quant:
+            qbnn_layer.quant_dict.append({'scale':qstub.scale.item(), 'zero_point':qstub.zero_point.item()})
+
     qbnn_layer.quantize()
     if d.dnn_to_bnn_flag:
         qbnn_layer.dnn_to_bnn_flag = True