[feat] complete support of integer transposed conv2d

chyomin06 · fracape · commit df52e516155b · 2025-07-09T23:49:27.000-07:00
diff --git a/compressai_vision/model_wrappers/detectron2.py b/compressai_vision/model_wrappers/detectron2.py
@@ -104,6 +104,50 @@ def forward(self, x: torch.Tensor):
         return x
 
 
+class ConvTranspose2d(IntTransposedConv2d):
+    def __init__(self, *args, **kwargs) -> None:
+        """
+        Extra keyword arguments supported in addition to those in `torch.nn.Conv2d`:
+
+        Args:
+            norm (nn.Module, optional): a normalization layer
+            activation (callable(Tensor) -> Tensor): a callable activation function
+
+        It assumes that norm layer is used before activation.
+        """
+
+        norm = kwargs.pop("norm", None)
+        activation = kwargs.pop("activation", None)
+        super().__init__(*args, **kwargs)
+
+        self.norm = norm
+        self.activation = activation
+
+    def set_attributes(self, module):
+
+        if hasattr(module, "norm"):
+            self.norm = module.norm
+
+        if hasattr(module, "activation"):
+            self.activation = module.activation
+
+        if hasattr(module, "bias"):
+            self.bias = module.bias
+
+    def forward(self, x: torch.Tensor):
+        if not self.initified_weight_mode:
+            x = self.transposedconv2d(x)
+        else:
+            x = self.integer_transposeconv2d(x)
+
+        if self.norm is not None:
+            x = self.norm(x)
+        if self.activation is not None:
+            x = self.activation(x)
+
+        return x
+
+
 class Split_Points(Enum):
     def __str__(self):
         return str(self.value)
@@ -188,15 +232,15 @@ def size_divisibility(self):
 
     def replace_conv2d_modules(self, module):
         for child_name, child_module in module.named_children():
-            if type(child_module).__name__ in ["Conv2d", "TransposedConv2d"]:
+            if type(child_module).__name__ in ["Conv2d", "ConvTranspose2d"]:
                 if type(child_module).__name__ == "Conv2d":
                     int_module = Conv2d(**child_module.__dict__)
                     int_module.set_attributes(child_module)
                 else:
-                    int_module = IntTransposedConv2d(**child_module.__dict__)
+                    int_module = ConvTranspose2d(**child_module.__dict__)
                     int_module.set_attributes(child_module)
 
-                # Since regular list is used instead of ModuleList
+                # Since regular list is used instead of ModuleList in Backbone
                 if "fpn_lateral" in child_name or "fpn_output" in child_name:
                     idx = re.findall(r"\d", child_name)
                     assert len(idx) == 1
@@ -211,12 +255,16 @@ def replace_conv2d_modules(self, module):
 
                 setattr(module, child_name, int_module)
             else:
+                # WATCH OUT RECURSIVE FUNCAITON CALLS
+                # The funnction can be rewritten by specifically iterate for each module
+                # including Conv2d and Trasnposed Conv2d.
+                # type(module).__name__ in ["FPN", "BasicStem", "BottleneckBlock", "StandardRPNHead", "MaskRCNNConvUpsampledHead"]
                 self.replace_conv2d_modules(child_module)
 
     @staticmethod
     def quantize_weights(model):
         for _, m in model.named_modules():
-            if type(m).__name__ == "Conv2d":
+            if type(m).__name__ in ["Conv2d", "ConvTranspose2d"]:
                 # print(f"Module name: {name} and type {type(m).__name__}")
                 m.quantize_weights()
 
diff --git a/compressai_vision/model_wrappers/intconv2d.py b/compressai_vision/model_wrappers/intconv2d.py
@@ -39,9 +39,15 @@ class IntConv2d(torch.nn.Conv2d):
     def __init__(self, *args, **kwargs) -> None:
         _nkwargs = copy.deepcopy(kwargs)
 
-        del _nkwargs["training"]
-        del _nkwargs["transposed"]
-        del _nkwargs["output_padding"]
+        if _nkwargs.get("training") is not None:
+            del _nkwargs["training"]
+
+        if _nkwargs.get("transposed") is not None:
+            del _nkwargs["transposed"]
+
+        if _nkwargs.get("output_padding") is not None:
+            del _nkwargs["output_padding"]
+
         for name in kwargs.keys():
             if name.startswith("_"):
                 del _nkwargs[name]
@@ -67,7 +73,7 @@ def quantize_weights(self):
             )
             _precision = 2 ** (23 + 1)
 
-        ###### REFERENCE FROM VCMRS ######
+        ###### ADOPT VCMRS IMPLEMENTATION ######
         # sf const
         sf_const = 48
 
@@ -94,14 +100,14 @@ def quantize_weights(self):
             self.bias.requires_grad = False  # Just make sure
             self.bias.zero_()
 
-        ###### END OF REFERENCE FROM VCMRS ######
+        ###### END OF THE REFERENCE IMPELEMENTATION OF THE INT CONVS IN VCMRS ######
 
     def integer_conv2d(self, x: torch.Tensor):
         _dtype = x.dtype
         _cudnn_enabled = torch.backends.cudnn.enabled
         torch.backends.cudnn.enabled = False
 
-        ###### REFERENCE FROM VCMRS ######
+        ######  ADOPT VCMRS IMPLEMENTATION  ######
         # Calculate factor
         fx = 1
 
@@ -124,15 +130,15 @@ def integer_conv2d(self, x: torch.Tensor):
         )
 
         # x should be all integers
-        out_x = out_x / (fx * self.fw.view(-1, 1, 1)).float()
+        out_x = out_x / (fx * self.fw.to(out_x.device).view(-1, 1, 1)).float()
 
         # apply bias in float format
         out_x = (
-            (out_x.permute(0, 2, 3, 1) + self.float_bias)
+            (out_x.permute(0, 2, 3, 1) + self.float_bias.to(out_x.device))
             .permute(0, 3, 1, 2)
             .contiguous()
         )
-        ###### REFERENCE FROM VCMRS ######
+        ###### END OF THE REFERENCE IMPELEMENTATION OF THE INT CONVS IN VCMRS ######
         torch.backends.cudnn.enabled = _cudnn_enabled
 
         return out_x.to(_dtype)
@@ -150,12 +156,15 @@ def conv2d(self, x: torch.Tensor):
 
 
 class IntTransposedConv2d(torch.nn.ConvTranspose2d):
-    def __init__(self, *args, **kwarg) -> None:
+    def __init__(self, *args, **kwargs) -> None:
         _nkwargs = copy.deepcopy(kwargs)
 
-        del _nkwargs["training"]
-        del _nkwargs["transposed"]
-        del _nkwargs["output_padding"]
+        if _nkwargs.get("training") is not None:
+            del _nkwargs["training"]
+
+        if _nkwargs.get("transposed") is not None:
+            del _nkwargs["transposed"]
+
         for name in kwargs.keys():
             if name.startswith("_"):
                 del _nkwargs[name]
@@ -164,7 +173,7 @@ def __init__(self, *args, **kwarg) -> None:
         self.initified_weight_mode = False
 
     # prepare quantized weights
-    def quantize(self):
+    def quantize_weights(self):
         self.initified_weight_mode = True
 
         if self.bias is None:
@@ -182,22 +191,21 @@ def quantize(self):
             )
             _precision = 2 ** (23 + 1)
 
-        ###### REFERENCE FROM VCMRS ######
-        #sf const
+        ######  ADOPT VCMRS IMPLEMENTATION  ######
+        # sf const
         sf_const = 48
 
-        #N = np.prod(self.weight.shape[1:])
-        N = np.prod(self.weight.shape) / self.weight.shape[1] # (in, out, kH, kW)
+        N = np.prod(self.weight.shape) / self.weight.shape[1]  # (in, out, kH, kW)
         self.N = N
         self.factor = np.sqrt(_precision)
-        #self.sf = 1/6 #precision bits allocation factor
+        # self.sf = 1/6 #precision bits allocation factor
         self.sf = np.sqrt(sf_const / N)
 
         # perform the calculate ion CPU to stabalize the calculation
         self.w_sum = self.weight.cpu().abs().sum(axis=[0, 2, 3]).to(self.weight.device)
-        self.w_sum[self.w_sum == 0] = 1 # prevent divide by 0
+        self.w_sum[self.w_sum == 0] = 1  # prevent divide by 0
 
-        self.fw = (self.factor / self.sf -  np.sqrt(N / 12) * 5) / self.w_sum
+        self.fw = (self.factor / self.sf - np.sqrt(N / 12) * 5) / self.w_sum
 
         # intify weights
         self.weight.requires_grad = False  # Just make sure
@@ -210,14 +218,14 @@ def quantize(self):
             self.bias.requires_grad = False  # Just make sure
             self.bias.zero_()
 
-        ###### END OF REFERENCE FROM VCMRS ######
+        ###### END OF THE REFERENCE IMPELEMENTATION OF THE INT CONVS IN VCMRS ######
 
     def integer_transposeconv2d(self, x: torch.Tensor):
         _dtype = x.dtype
         _cudnn_enabled = torch.backends.cudnn.enabled
         torch.backends.cudnn.enabled = False
 
-        ###### REFERENCE FROM VCMRS ######
+        ######  ADOPT VCMRS IMPLEMENTATION  ######
         # Calculate factor
         fx = 1
 
@@ -227,17 +235,24 @@ def integer_transposeconv2d(self, x: torch.Tensor):
             fx = (self.factor * self.sf - 0.5) / x_max
 
         # intify x
-        x = torch.round(fx * x)
-        x = super().forward(x)
+        out_x = torch.round(fx * x)
+        out_x = super().forward(out_x)
 
         # x should be all integers
-        x /= fx * self.fw.view(-1, 1, 1)
-        x = x.float()
+        out_x = out_x / (fx * self.fw.to(out_x.device).view(-1, 1, 1))
+        out_x = out_x.float()
 
         # apply bias in float format
-        x = (x.permute(0, 2, 3, 1) + self.float_bias).permute(0, 3, 1, 2).contiguous()
+        out_x = (
+            (out_x.permute(0, 2, 3, 1) + self.float_bias.to(out_x.device))
+            .permute(0, 3, 1, 2)
+            .contiguous()
+        )
 
-        ###### REFERENCE FROM VCMRS ######
+        ###### END OF THE REFERENCE IMPELEMENTATION OF THE INT CONVS IN VCMRS ######
         torch.backends.cudnn.enabled = _cudnn_enabled
 
         return out_x.to(_dtype)
+
+    def transposedconv2d(self, x: torch.Tensor):
+        return super().forward(x)
diff --git a/compressai_vision/model_wrappers/jde.py b/compressai_vision/model_wrappers/jde.py
@@ -31,6 +31,7 @@
 from pathlib import Path
 from typing import Dict, List
 
+import jde
 import torch
 from jde.models import Darknet
 from jde.tracker import matching