qbnn performance test

junliang-lin · junliang-lin · commit 87488e2887be · 2023-03-06T02:54:34.000-05:00
diff --git a/bayesian_torch/layers/variational_layers/conv_variational.py b/bayesian_torch/layers/variational_layers/conv_variational.py
@@ -48,7 +48,7 @@
 from torch.nn import Parameter
 from ..base_variational_layer import BaseVariationalLayer_, get_kernel_size
 import math
-from torch.quantization.observer import HistogramObserver, PerChannelMinMaxObserver
+from torch.quantization.observer import HistogramObserver, PerChannelMinMaxObserver, MinMaxObserver
 from torch.quantization.qconfig import QConfig
 
 __all__ = [
@@ -301,9 +301,9 @@ def __init__(self,
 
     def prepare(self):
         self.qint_quant = nn.ModuleList([torch.quantization.QuantStub(
-                                         QConfig(weight=HistogramObserver.with_args(dtype=torch.qint8), activation=HistogramObserver.with_args(dtype=torch.qint8))) for _ in range(5)])
+                                         QConfig(weight=MinMaxObserver.with_args(dtype=torch.qint8, qscheme=torch.per_tensor_symmetric), activation=MinMaxObserver.with_args(dtype=torch.qint8,qscheme=torch.per_tensor_symmetric))) for _ in range(5)])
         self.quint_quant = nn.ModuleList([torch.quantization.QuantStub(
-                                         QConfig(weight=HistogramObserver.with_args(dtype=torch.quint8), activation=HistogramObserver.with_args(dtype=torch.quint8))) for _ in range(2)])
+                                         QConfig(weight=MinMaxObserver.with_args(dtype=torch.quint8), activation=MinMaxObserver.with_args(dtype=torch.quint8))) for _ in range(2)])
         self.dequant = torch.quantization.DeQuantStub()
         self.quant_prepare=True
 
diff --git a/bayesian_torch/layers/variational_layers/linear_variational.py b/bayesian_torch/layers/variational_layers/linear_variational.py
@@ -47,6 +47,8 @@
 from torch.nn import Module, Parameter
 from ..base_variational_layer import BaseVariationalLayer_
 import math
+from torch.quantization.observer import HistogramObserver, PerChannelMinMaxObserver, MinMaxObserver
+from torch.quantization.qconfig import QConfig
 
 
 class LinearReparameterization(BaseVariationalLayer_):
@@ -120,9 +122,9 @@ def __init__(self,
     
     def prepare(self):
         self.qint_quant = nn.ModuleList([torch.quantization.QuantStub(
-                                         QConfig(weight=HistogramObserver.with_args(dtype=torch.qint8), activation=HistogramObserver.with_args(dtype=torch.qint8))) for _ in range(5)])
+                                         QConfig(weight=MinMaxObserver.with_args(dtype=torch.qint8, qscheme=torch.per_tensor_symmetric), activation=MinMaxObserver.with_args(dtype=torch.qint8,qscheme=torch.per_tensor_symmetric))) for _ in range(5)])
         self.quint_quant = nn.ModuleList([torch.quantization.QuantStub(
-                                         QConfig(weight=HistogramObserver.with_args(dtype=torch.quint8), activation=HistogramObserver.with_args(dtype=torch.quint8))) for _ in range(2)])
+                                         QConfig(weight=MinMaxObserver.with_args(dtype=torch.quint8), activation=MinMaxObserver.with_args(dtype=torch.quint8))) for _ in range(2)])
         self.dequant = torch.quantization.DeQuantStub()
         self.quant_prepare=True
 
@@ -157,7 +159,7 @@ def forward(self, input, return_kl=True):
             return_kl = False
         sigma_weight = torch.log1p(torch.exp(self.rho_weight))
         eps_weight = self.eps_weight.data.normal_()
-        tmp_result = sigma_weight * eps_kernel
+        tmp_result = sigma_weight * eps_weight
         weight = self.mu_weight + tmp_result
 
         if return_kl:
diff --git a/bayesian_torch/layers/variational_layers/quantize_linear_variational.py b/bayesian_torch/layers/variational_layers/quantize_linear_variational.py
@@ -118,8 +118,8 @@ def quantize(self):
         delattr(self, "mu_weight")
         delattr(self, "rho_weight")
 
-        self.quantized_mu_bias = Parameter(self.get_quantized_tensor(self.mu_bias), requires_grad=False)
-        self.quantized_sigma_bias = Parameter(self.get_quantized_tensor(torch.log1p(torch.exp(self.rho_bias))), requires_grad=False)
+        self.quantized_mu_bias = self.mu_bias#Parameter(self.get_quantized_tensor(self.mu_bias), requires_grad=False)
+        self.quantized_sigma_bias = torch.log1p(torch.exp(self.rho_bias))#Parameter(self.get_quantized_tensor(torch.log1p(torch.exp(self.rho_bias))), requires_grad=False)
         delattr(self, "mu_bias")
         delattr(self, "rho_bias")
 
@@ -171,7 +171,7 @@ def forward(self, input, enable_int8_compute=True, normal_scale=6/255, default_s
 
         if self.quant_dict is not None:
             eps_weight = torch.quantize_per_tensor(self.eps_weight.data.normal_(), self.quant_dict[0]['scale'], self.quant_dict[0]['zero_point'], torch.qint8) # Quantize a tensor from normal distribution. 99.7% values will lie within 3 standard deviations, so the original range is set as 6.
-            weight = torch.ops.quantized.mul(self.quantized_sigma_weight, eps_kernel, self.quant_dict[1]['scale'], self.quant_dict[1]['zero_point'])
+            weight = torch.ops.quantized.mul(self.quantized_sigma_weight, eps_weight, self.quant_dict[1]['scale'], self.quant_dict[1]['zero_point'])
             weight = torch.ops.quantized.add(weight, self.quantized_mu_weight, self.quant_dict[2]['scale'], self.quant_dict[2]['zero_point'])
             bias = None
 
diff --git a/bayesian_torch/models/bnn_to_qbnn.py b/bayesian_torch/models/bnn_to_qbnn.py
@@ -200,6 +200,8 @@ def bnn_to_qbnn(m, fuse_conv_bn=False):
         if m._modules[name]._modules:
             if "Conv" in m._modules[name].__class__.__name__:
                 setattr(m, name, qbnn_conv_layer(m._modules[name]))
+            elif "Linear" in m._modules[name].__class__.__name__:
+                setattr(m, name, qbnn_linear_layer(m._modules[name]))
             else:
                 bnn_to_qbnn(m._modules[name], fuse_conv_bn=fuse_conv_bn)
         elif "Linear" in m._modules[name].__class__.__name__: