1+ '''
2+ ResNet in PyTorch.
3+ Reference:
4+ [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
5+ Deep Residual Learning for Image Recognition. arXiv:1512.03385
6+ '''
7+ import torch
8+ import torch .nn as nn
9+ class BasicBlock (nn .Module ):
10+ """
11+ 对于浅层网络,如ResNet-18/34等,用基本的Block
12+ 基础模块没有压缩,所以expansion=1
13+ """
14+ expansion = 1
15+ def __init__ (self , in_channels , out_channels , stride = 1 ):
16+ super (BasicBlock ,self ).__init__ ()
17+ self .features = nn .Sequential (
18+ nn .Conv2d (in_channels , out_channels , kernel_size = 3 , stride = stride , padding = 1 , bias = False ),
19+ nn .BatchNorm2d (out_channels ),
20+ nn .ReLU (True ),
21+ nn .Conv2d (out_channels ,out_channels , kernel_size = 3 , stride = 1 , padding = 1 , bias = False ),
22+ nn .BatchNorm2d (out_channels )
23+ )
24+ # 如果输入输出维度不等,则使用1x1卷积层来改变维度
25+ self .shortcut = nn .Sequential ()
26+ if stride != 1 or in_channels != self .expansion * out_channels :
27+ self .shortcut = nn .Sequential (
28+ nn .Conv2d (in_channels , self .expansion * out_channels , kernel_size = 1 , stride = stride , bias = False ),
29+ nn .BatchNorm2d (self .expansion * out_channels ),
30+ )
31+ def forward (self , x ):
32+ out = self .features (x )
33+ # print(out.shape)
34+ out += self .shortcut (x )
35+ out = torch .relu (out )
36+ return out
37+
38+
39+ class Bottleneck (nn .Module ):
40+ """
41+ 对于深层网络,我们使用BottleNeck,论文中提出其拥有近似的计算复杂度,但能节省很多资源
42+ zip_channels: 压缩后的维数,最后输出的维数是 expansion * zip_channels
43+ 针对ResNet50/101/152的网络结构,主要是因为第三层是第二层的4倍的关系所以expansion=4
44+ """
45+ expansion = 4
46+
47+ def __init__ (self , in_channels , zip_channels , stride = 1 ):
48+ super (Bottleneck , self ).__init__ ()
49+ out_channels = self .expansion * zip_channels
50+ self .features = nn .Sequential (
51+ nn .Conv2d (in_channels , zip_channels , kernel_size = 1 , bias = False ),
52+ nn .BatchNorm2d (zip_channels ),
53+ nn .ReLU (inplace = True ),
54+ nn .Conv2d (zip_channels , zip_channels , kernel_size = 3 , stride = stride , padding = 1 , bias = False ),
55+ nn .BatchNorm2d (zip_channels ),
56+ nn .ReLU (inplace = True ),
57+ nn .Conv2d (zip_channels , out_channels , kernel_size = 1 , bias = False ),
58+ nn .BatchNorm2d (out_channels )
59+ )
60+ self .shortcut = nn .Sequential ()
61+ if stride != 1 or in_channels != out_channels :
62+ self .shortcut = nn .Sequential (
63+ nn .Conv2d (in_channels , out_channels , kernel_size = 1 , stride = stride , bias = False ),
64+ nn .BatchNorm2d (out_channels )
65+ )
66+
67+ def forward (self , x ):
68+ out = self .features (x )
69+ # print(out.shape)
70+ out += self .shortcut (x )
71+ out = torch .relu (out )
72+ return out
73+
74+ class ResNet (nn .Module ):
75+ """
76+ 不同的ResNet架构都是统一的一层特征提取、四层残差,不同点在于每层残差的深度。
77+ 对于cifar10,feature map size的变化如下:
78+ (32, 32, 3) -> [Conv2d] -> (32, 32, 64) -> [Res1] -> (32, 32, 64) -> [Res2]
79+ -> (16, 16, 128) -> [Res3] -> (8, 8, 256) ->[Res4] -> (4, 4, 512) -> [AvgPool]
80+ -> (1, 1, 512) -> [Reshape] -> (512) -> [Linear] -> (10)
81+ """
82+ def __init__ (self , block , num_blocks , num_classes = 10 , verbose = False ):
83+ super (ResNet , self ).__init__ ()
84+ self .verbose = verbose
85+ self .in_channels = 64
86+ self .features = nn .Sequential (
87+ nn .Conv2d (3 , 64 , kernel_size = 3 , stride = 1 , padding = 1 , bias = False ),
88+ nn .BatchNorm2d (64 ),
89+ nn .ReLU (inplace = True )
90+ )
91+ #使用_make_layer函数生成上表对应的conv2_x, conv3_x, conv4_x, conv5_x的结构
92+ self .layer1 = self ._make_layer (block , 64 , num_blocks [0 ], stride = 1 )
93+ self .layer2 = self ._make_layer (block , 128 , num_blocks [1 ], stride = 2 )
94+ self .layer3 = self ._make_layer (block , 256 , num_blocks [2 ], stride = 2 )
95+ self .layer4 = self ._make_layer (block , 512 , num_blocks [3 ], stride = 2 )
96+ # cifar10经过上述结构后,到这里的feature map size是 4 x 4 x 512 x expansion
97+ # 所以这里用了 4 x 4 的平均池化
98+ self .avg_pool = nn .AvgPool2d (kernel_size = 4 )
99+ self .classifer = nn .Linear (512 * block .expansion , num_classes )
100+
101+ def _make_layer (self , block , out_channels , num_blocks , stride ):
102+ # 第一个block要进行降采样
103+ strides = [stride ] + [1 ] * (num_blocks - 1 )
104+ layers = []
105+ for stride in strides :
106+ layers .append (block (self .in_channels , out_channels , stride ))
107+ # 如果是Bottleneck Block的话需要对每层输入的维度进行压缩,压缩后再增加维数
108+ # 所以每层的输入维数也要跟着变
109+ self .in_channels = out_channels * block .expansion
110+ return nn .Sequential (* layers )
111+
112+ def forward (self , x ):
113+ out = self .features (x )
114+ if self .verbose :
115+ print ('block 1 output: {}' .format (out .shape ))
116+ out = self .layer1 (out )
117+ if self .verbose :
118+ print ('block 2 output: {}' .format (out .shape ))
119+ out = self .layer2 (out )
120+ if self .verbose :
121+ print ('block 3 output: {}' .format (out .shape ))
122+ out = self .layer3 (out )
123+ if self .verbose :
124+ print ('block 4 output: {}' .format (out .shape ))
125+ out = self .layer4 (out )
126+ if self .verbose :
127+ print ('block 5 output: {}' .format (out .shape ))
128+ out = self .avg_pool (out )
129+ out = out .view (out .size (0 ), - 1 )
130+ out = self .classifer (out )
131+ return out
132+
133+ def ResNet18 (verbose = False ):
134+ return ResNet (BasicBlock , [2 ,2 ,2 ,2 ],verbose = verbose )
135+
136+ def ResNet34 (verbose = False ):
137+ return ResNet (BasicBlock , [3 ,4 ,6 ,3 ],verbose = verbose )
138+
139+ def ResNet50 (verbose = False ):
140+ return ResNet (Bottleneck , [3 ,4 ,6 ,3 ],verbose = verbose )
141+
142+ def ResNet101 (verbose = False ):
143+ return ResNet (Bottleneck , [3 ,4 ,23 ,3 ],verbose = verbose )
144+
145+ def ResNet152 (verbose = False ):
146+ return ResNet (Bottleneck , [3 ,8 ,36 ,3 ],verbose = verbose )
147+
148+ def test ():
149+ net = ResNet34 ()
150+ x = torch .randn (2 ,3 ,32 ,32 )
151+ y = net (x )
152+ print (y .size ())
153+ from torchinfo import summary
154+ device = 'cuda' if torch .cuda .is_available () else 'cpu'
155+ net = net .to (device )
156+ summary (net ,(2 ,3 ,32 ,32 ))
157+
158+ test ()
0 commit comments