20
20
import numpy as np
21
21
import time
22
22
import os
23
+ import math
23
24
24
25
import cProfile , pstats , StringIO
25
26
26
27
import paddle
27
28
import paddle .fluid as fluid
28
29
import paddle .fluid .core as core
29
30
import paddle .fluid .profiler as profiler
30
- # from recordio_converter import imagenet_train, imagenet_test
31
31
from imagenet_reader import train , val
32
32
33
+ train_parameters = {
34
+ "input_size" : [3 , 224 , 224 ],
35
+ "input_mean" : [0.485 , 0.456 , 0.406 ],
36
+ "input_std" : [0.229 , 0.224 , 0.225 ],
37
+ "learning_strategy" : {
38
+ "name" : "piecewise_decay" ,
39
+ "batch_size" : 256 ,
40
+ "epochs" : [30 , 60 , 90 ],
41
+ "steps" : [0.1 , 0.01 , 0.001 , 0.0001 ]
42
+ }
43
+ }
44
+
45
+
46
+ class ResNet ():
47
+ def __init__ (self , layers = 50 , is_train = True ):
48
+ self .params = train_parameters
49
+ self .layers = layers
50
+ self .is_train = is_train
51
+
52
+ def net (self , input , class_dim = 1000 ):
53
+ layers = self .layers
54
+ supported_layers = [50 , 101 , 152 ]
55
+ assert layers in supported_layers , \
56
+ "supported layers are {} but input layer is {}" .format (supported_layers , layers )
57
+
58
+ if layers == 50 :
59
+ depth = [3 , 4 , 6 , 3 ]
60
+ elif layers == 101 :
61
+ depth = [3 , 4 , 23 , 3 ]
62
+ elif layers == 152 :
63
+ depth = [3 , 8 , 36 , 3 ]
64
+ num_filters = [64 , 128 , 256 , 512 ]
65
+
66
+ conv = self .conv_bn_layer (
67
+ input = input , num_filters = 64 , filter_size = 7 , stride = 2 , act = 'relu' )
68
+ conv = fluid .layers .pool2d (
69
+ input = conv ,
70
+ pool_size = 3 ,
71
+ pool_stride = 2 ,
72
+ pool_padding = 1 ,
73
+ pool_type = 'max' )
74
+
75
+ for block in range (len (depth )):
76
+ for i in range (depth [block ]):
77
+ conv = self .bottleneck_block (
78
+ input = conv ,
79
+ num_filters = num_filters [block ],
80
+ stride = 2 if i == 0 and block != 0 else 1 )
81
+
82
+ pool = fluid .layers .pool2d (
83
+ input = conv , pool_size = 7 , pool_type = 'avg' , global_pooling = True )
84
+ stdv = 1.0 / math .sqrt (pool .shape [1 ] * 1.0 )
85
+ out = fluid .layers .fc (input = pool ,
86
+ size = class_dim ,
87
+ act = 'softmax' ,
88
+ param_attr = fluid .param_attr .ParamAttr (
89
+ initializer = fluid .initializer .Uniform (- stdv ,
90
+ stdv )))
91
+ return out
92
+
93
+ def conv_bn_layer (self ,
94
+ input ,
95
+ num_filters ,
96
+ filter_size ,
97
+ stride = 1 ,
98
+ groups = 1 ,
99
+ act = None ):
100
+ conv = fluid .layers .conv2d (
101
+ input = input ,
102
+ num_filters = num_filters ,
103
+ filter_size = filter_size ,
104
+ stride = stride ,
105
+ padding = (filter_size - 1 ) // 2 ,
106
+ groups = groups ,
107
+ act = None ,
108
+ bias_attr = False )
109
+ return fluid .layers .batch_norm (
110
+ input = conv , act = act , is_test = not self .is_train )
111
+
112
+ def shortcut (self , input , ch_out , stride ):
113
+ ch_in = input .shape [1 ]
114
+ if ch_in != ch_out or stride != 1 :
115
+ return self .conv_bn_layer (input , ch_out , 1 , stride )
116
+ else :
117
+ return input
33
118
34
- def conv_bn_layer (input ,
35
- ch_out ,
36
- filter_size ,
37
- stride ,
38
- padding ,
39
- act = 'relu' ,
40
- is_train = True ):
41
- conv1 = fluid .layers .conv2d (
42
- input = input ,
43
- filter_size = filter_size ,
44
- num_filters = ch_out ,
45
- stride = stride ,
46
- padding = padding ,
47
- act = None ,
48
- bias_attr = False )
49
- return fluid .layers .batch_norm (input = conv1 , act = act , is_test = not is_train )
50
-
51
-
52
- def shortcut (input , ch_out , stride , is_train = True ):
53
- ch_in = input .shape [1 ] # if args.data_format == 'NCHW' else input.shape[-1]
54
- if ch_in != ch_out :
55
- return conv_bn_layer (
56
- input , ch_out , 1 , stride , 0 , None , is_train = is_train )
57
- else :
58
- return input
59
-
60
-
61
- def basicblock (input , ch_out , stride , is_train = True ):
62
- short = shortcut (input , ch_out , stride , is_train = is_train )
63
- conv1 = conv_bn_layer (input , ch_out , 3 , stride , 1 , is_train = is_train )
64
- conv2 = conv_bn_layer (conv1 , ch_out , 3 , 1 , 1 , act = None , is_train = is_train )
65
- return fluid .layers .elementwise_add (x = short , y = conv2 , act = 'relu' )
66
-
67
-
68
- def bottleneck (input , ch_out , stride , is_train = True ):
69
- short = shortcut (input , ch_out * 4 , stride , is_train = is_train )
70
- conv1 = conv_bn_layer (input , ch_out , 1 , stride , 0 , is_train = is_train )
71
- conv2 = conv_bn_layer (conv1 , ch_out , 3 , 1 , 1 , is_train = is_train )
72
- conv3 = conv_bn_layer (
73
- conv2 , ch_out * 4 , 1 , 1 , 0 , act = None , is_train = is_train )
74
- return fluid .layers .elementwise_add (x = short , y = conv3 , act = 'relu' )
75
-
76
-
77
- def layer_warp (block_func , input , ch_out , count , stride ):
78
- res_out = block_func (input , ch_out , stride )
79
- for i in range (1 , count ):
80
- res_out = block_func (res_out , ch_out , 1 )
81
- return res_out
82
-
119
+ def bottleneck_block (self , input , num_filters , stride ):
120
+ conv0 = self .conv_bn_layer (
121
+ input = input , num_filters = num_filters , filter_size = 1 , act = 'relu' )
122
+ conv1 = self .conv_bn_layer (
123
+ input = conv0 ,
124
+ num_filters = num_filters ,
125
+ filter_size = 3 ,
126
+ stride = stride ,
127
+ act = 'relu' )
128
+ conv2 = self .conv_bn_layer (
129
+ input = conv1 , num_filters = num_filters * 4 , filter_size = 1 , act = None )
83
130
84
- def resnet_imagenet (input ,
85
- class_dim ,
86
- depth = 50 ,
87
- data_format = 'NCHW' ,
88
- is_train = True ):
131
+ short = self .shortcut (input , num_filters * 4 , stride )
89
132
90
- cfg = {
91
- 18 : ([2 , 2 , 2 , 1 ], basicblock ),
92
- 34 : ([3 , 4 , 6 , 3 ], basicblock ),
93
- 50 : ([3 , 4 , 6 , 3 ], bottleneck ),
94
- 101 : ([3 , 4 , 23 , 3 ], bottleneck ),
95
- 152 : ([3 , 8 , 36 , 3 ], bottleneck )
96
- }
97
- stages , block_func = cfg [depth ]
98
- conv1 = conv_bn_layer (input , ch_out = 64 , filter_size = 7 , stride = 2 , padding = 3 )
99
- pool1 = fluid .layers .pool2d (
100
- input = conv1 , pool_type = 'avg' , pool_size = 3 , pool_stride = 2 )
101
- res1 = layer_warp (block_func , pool1 , 64 , stages [0 ], 1 )
102
- res2 = layer_warp (block_func , res1 , 128 , stages [1 ], 2 )
103
- res3 = layer_warp (block_func , res2 , 256 , stages [2 ], 2 )
104
- res4 = layer_warp (block_func , res3 , 512 , stages [3 ], 2 )
105
- pool2 = fluid .layers .pool2d (
106
- input = res4 ,
107
- pool_size = 7 ,
108
- pool_type = 'avg' ,
109
- pool_stride = 1 ,
110
- global_pooling = True )
111
- out = fluid .layers .fc (input = pool2 , size = class_dim , act = 'softmax' )
112
- return out
113
-
114
-
115
- def resnet_cifar10 (input , class_dim , depth = 32 , data_format = 'NCHW' ):
116
- assert (depth - 2 ) % 6 == 0
117
-
118
- n = (depth - 2 ) // 6
119
-
120
- conv1 = conv_bn_layer (
121
- input = input , ch_out = 16 , filter_size = 3 , stride = 1 , padding = 1 )
122
- res1 = layer_warp (basicblock , conv1 , 16 , n , 1 )
123
- res2 = layer_warp (basicblock , res1 , 32 , n , 2 )
124
- res3 = layer_warp (basicblock , res2 , 64 , n , 2 )
125
- pool = fluid .layers .pool2d (
126
- input = res3 , pool_size = 8 , pool_type = 'avg' , pool_stride = 1 )
127
- out = fluid .layers .fc (input = pool , size = class_dim , act = 'softmax' )
128
- return out
133
+ return fluid .layers .elementwise_add (x = short , y = conv2 , act = 'relu' )
129
134
130
135
131
136
def _model_reader_dshape_classdim (args , is_train ):
132
- model = resnet_cifar10
137
+ model = None
133
138
reader = None
134
- if args .data_set == "cifar10" :
135
- class_dim = 10
136
- if args .data_format == 'NCHW' :
137
- dshape = [3 , 32 , 32 ]
138
- else :
139
- dshape = [32 , 32 , 3 ]
140
- model = resnet_cifar10
141
- if is_train :
142
- reader = paddle .dataset .cifar .train10 ()
143
- else :
144
- reader = paddle .dataset .cifar .test10 ()
145
- elif args .data_set == "flowers" :
139
+ if args .data_set == "flowers" :
146
140
class_dim = 102
147
141
if args .data_format == 'NCHW' :
148
142
dshape = [3 , 224 , 224 ]
149
143
else :
150
144
dshape = [224 , 224 , 3 ]
151
- model = resnet_imagenet
152
145
if is_train :
153
146
reader = paddle .dataset .flowers .train ()
154
147
else :
@@ -159,7 +152,6 @@ def _model_reader_dshape_classdim(args, is_train):
159
152
dshape = [3 , 224 , 224 ]
160
153
else :
161
154
dshape = [224 , 224 , 3 ]
162
- model = resnet_imagenet
163
155
if not args .data_path :
164
156
raise Exception (
165
157
"Must specify --data_path when training with imagenet" )
@@ -173,12 +165,11 @@ def _model_reader_dshape_classdim(args, is_train):
173
165
reader = train (xmap = False )
174
166
else :
175
167
reader = val (xmap = False )
176
- return model , reader , dshape , class_dim
168
+ return reader , dshape , class_dim
177
169
178
170
179
171
def get_model (args , is_train , main_prog , startup_prog ):
180
- model , reader , dshape , class_dim = _model_reader_dshape_classdim (args ,
181
- is_train )
172
+ reader , dshape , class_dim = _model_reader_dshape_classdim (args , is_train )
182
173
183
174
pyreader = None
184
175
trainer_count = int (os .getenv ("PADDLE_TRAINERS" ))
@@ -198,7 +189,8 @@ def get_model(args, is_train, main_prog, startup_prog):
198
189
label = fluid .layers .data (
199
190
name = 'label' , shape = [1 ], dtype = 'int64' )
200
191
201
- predict = model (input , class_dim , is_train = is_train )
192
+ model = ResNet (is_train = is_train )
193
+ predict = model .net (input , class_dim = class_dim )
202
194
cost = fluid .layers .cross_entropy (input = predict , label = label )
203
195
avg_cost = fluid .layers .mean (x = cost )
204
196
@@ -216,15 +208,14 @@ def get_model(args, is_train, main_prog, startup_prog):
216
208
total_images = 1281167 / trainer_count
217
209
218
210
step = int (total_images / args .batch_size + 1 )
219
- epochs = [30 , 60 , 80 , 90 ]
211
+ epochs = [30 , 60 , 90 ]
220
212
bd = [step * e for e in epochs ]
221
213
base_lr = args .learning_rate
222
214
lr = []
223
215
lr = [base_lr * (0.1 ** i ) for i in range (len (bd ) + 1 )]
224
216
optimizer = fluid .optimizer .Momentum (
225
- learning_rate = base_lr ,
226
- #learning_rate=fluid.layers.piecewise_decay(
227
- # boundaries=bd, values=lr),
217
+ learning_rate = fluid .layers .piecewise_decay (
218
+ boundaries = bd , values = lr ),
228
219
momentum = 0.9 ,
229
220
regularization = fluid .regularizer .L2Decay (1e-4 ))
230
221
optimizer .minimize (avg_cost )
0 commit comments