增加权重初始化

Kedreamix · Kedreamix · commit 336c7c7aecd9 · 2022-05-08T10:30:31.000+08:00
diff --git a/CIFAR10_code/nets/AlexNet.py b/CIFAR10_code/nets/AlexNet.py
@@ -7,7 +7,7 @@
 
 # 定义2012的AlexNet
 class AlexNet(nn.Module): 
-    def __init__(self,num_classes=10):
+    def __init__(self,num_classes=10, init_weights=True):
         super(AlexNet,self).__init__()
         # 五个卷积层 输入 32 * 32 * 3
         self.conv1 = nn.Sequential(
@@ -43,7 +43,8 @@ def __init__(self,num_classes=10):
             nn.ReLU(),
             nn.Linear(84,num_classes)
         )
-    
+        if init_weights:
+            self._initialize_weights()
     def forward(self,x):
         x = self.conv1(x)
         x = self.conv2(x)
@@ -53,7 +54,19 @@ def forward(self,x):
         x = x.view(x.size()[0],-1)
         x = self.fc(x)
         return x
-
+    
+    def _initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.Linear):
+                nn.init.normal_(m.weight, 0, 0.01)
+                nn.init.constant_(m.bias, 0)
 def test():
     net = AlexNet()
     x = torch.randn(2,3,32,32)
diff --git a/CIFAR10_code/nets/DenseNet.py b/CIFAR10_code/nets/DenseNet.py
@@ -72,7 +72,7 @@ class DenseNet(nn.Module):
   ->(384, 1, 1) -> [Linear] -> (10)
     
     """
-    def __init__(self, num_blocks, growth_rate=12, reduction=0.5, num_classes=10):
+    def __init__(self, num_blocks, growth_rate=12, reduction=0.5, num_classes=10, init_weights=True):
         super(DenseNet, self).__init__()
         self.growth_rate = growth_rate
         self.reduction = reduction
@@ -91,7 +91,8 @@ def __init__(self, num_blocks, growth_rate=12, reduction=0.5, num_classes=10):
         )
         self.classifier = nn.Linear(num_channels, num_classes)
         
-        self._initialize_weight()
+        if init_weights:
+            self._initialize_weights()
         
     def _make_dense_layer(self, in_channels, nblock, transition=True):
         layers = []
@@ -104,12 +105,18 @@ def _make_dense_layer(self, in_channels, nblock, transition=True):
             layers += [Transition(in_channels, out_channels)]
         return nn.Sequential(*layers), out_channels
     
-    def _initialize_weight(self):
+    def _initialize_weights(self):
         for m in self.modules():
             if isinstance(m, nn.Conv2d):
-                nn.init.kaiming_normal_(m.weight.data)
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                 if m.bias is not None:
-                    m.bias.data.zero_()
+                    nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.Linear):
+                nn.init.normal_(m.weight, 0, 0.01)
+                nn.init.constant_(m.bias, 0)
     
     def forward(self, x):
         out = self.features(x)
diff --git a/CIFAR10_code/nets/LeNet-5.py b/CIFAR10_code/nets/LeNet-5.py
@@ -5,7 +5,7 @@
 import torch.nn as nn
 
 class LeNet5(nn.Module):
-    def __init__(self, num_classes = 10):
+    def __init__(self, num_classes = 10, init_weights=True):
         super(LeNet5,self).__init__()
         self.conv1 = nn.Sequential(
             # 输入 32x32x3 -> 28x28x6 (32-5)/1 + 1=28
@@ -30,7 +30,8 @@ def __init__(self, num_classes = 10):
             nn.ReLU(),
             nn.Linear(84,num_classes)
         )
-        
+        if init_weights:
+            self._initialize_weights()
     def forward(self,x):
         x = self.conv1(x)
         x = self.conv2(x)
@@ -39,6 +40,18 @@ def forward(self,x):
         x = self.fc(x)
         return x
     
+    def _initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.Linear):
+                nn.init.normal_(m.weight, 0, 0.01)
+                nn.init.constant_(m.bias, 0)    
 
 def test():
     net = LeNet5()
diff --git a/CIFAR10_code/nets/MobileNetv1.py b/CIFAR10_code/nets/MobileNetv1.py
@@ -29,7 +29,7 @@ class MobileNet(nn.Module):
     # (128,2) means conv channel=128, conv stride=2, by default conv stride=1
     cfg = [64,(128,2),128,(256,2),256,(512,2),512,512,512,512,512,(1024,2),1024]
     
-    def __init__(self, num_classes=10,alpha=1.0,beta=1.0):
+    def __init__(self, num_classes=10,alpha=1.0,beta=1.0,init_weights=True):
         super(MobileNet,self).__init__()
         self.conv1 = nn.Sequential(
             nn.Conv2d(3,32,kernel_size=3,stride=1,bias=False),
@@ -49,6 +49,9 @@ def _make_layers(self, in_channels):
             in_channels = out_channels
         return nn.Sequential(*layers)
     
+        if init_weights:
+            self._initialize_weights()
+
     def forward(self,x):
         x = self.conv1(x)
         x = self.layers(x)
@@ -57,7 +60,7 @@ def forward(self,x):
         x = self.linear(x)
         return x
 
-    def init_weight(self):
+    def _initialize_weights(self):
         for w in self.modules():
             if isinstance(w, nn.Conv2d):
                 nn.init.kaiming_normal_(w.weight, mode='fan_out')
diff --git a/CIFAR10_code/nets/ResNet.py b/CIFAR10_code/nets/ResNet.py
@@ -79,7 +79,7 @@ class ResNet(nn.Module):
  -> (16, 16, 128) -> [Res3] -> (8, 8, 256) ->[Res4] -> (4, 4, 512) -> [AvgPool] 
  -> (1, 1, 512) -> [Reshape] -> (512) -> [Linear] -> (10)
     """
-    def __init__(self, block, num_blocks, num_classes=10, verbose = False):
+    def __init__(self, block, num_blocks, num_classes=10, verbose = False,  init_weights=True):
         super(ResNet, self).__init__()
         self.verbose = verbose
         self.in_channels = 64
@@ -97,7 +97,11 @@ def __init__(self, block, num_blocks, num_classes=10, verbose = False):
         # 所以这里用了 4 x 4 的平均池化
         self.avg_pool = nn.AvgPool2d(kernel_size=4)
         self.classifer = nn.Linear(512 * block.expansion, num_classes)
-        
+
+        if init_weights:
+            self._initialize_weights()
+
+                    
     def _make_layer(self, block, out_channels, num_blocks, stride):
         # 第一个block要进行降采样
         strides = [stride] + [1] * (num_blocks - 1)
@@ -130,6 +134,18 @@ def forward(self, x):
         out = self.classifer(out)
         return out
     
+    def _initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.Linear):
+                nn.init.normal_(m.weight, 0, 0.01)
+                nn.init.constant_(m.bias, 0)
 def ResNet18(verbose=False):
     return ResNet(BasicBlock, [2,2,2,2],verbose=verbose)
 
diff --git a/CIFAR10_code/nets/VGG.py b/CIFAR10_code/nets/VGG.py
@@ -13,7 +13,7 @@
 }
 
 class VGG(nn.Module):
-    def __init__(self, vggname = 'VGG16',num_classes=10):
+    def __init__(self, vggname = 'VGG16',num_classes=10, init_weights=True):
         super(VGG,self).__init__()
         self.features = self._make_layers(cfg[vggname])
         self.classifier = nn.Linear(512,num_classes)
@@ -31,13 +31,27 @@ def _make_layers(self,cfg):
                 in_channels = x
         layers += [nn.AvgPool2d(kernel_size=1,stride=1)]
         return nn.Sequential(*layers)
-    
+        if init_weights:
+            self. _initialize_weight()
     def forward(self,x):
         x = self.features(x)
         x = x.view(x.size()[0],-1)
         x = self.classifier(x)
         return x
-    
+    # 初始化参数
+    def _initialize_weight(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                # xavier is used in VGG's paper
+                nn.init.xavier_normal_(m.weight.data)
+                if m.bias is not None:
+                    m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+            elif isinstance(m, nn.Linear):
+                m.weight.data.normal_(0, 0.01)
+                m.bias.data.zero_()
 def test():
     net = VGG('VGG19')
     x = torch.randn(2,3,32,32)