@@ -107,44 +107,24 @@ def _init_data(self, random=True):
107
107
label = np .ones (shape = [32 , 1 ], dtype = 'int64' )
108
108
return img , label
109
109
110
- # simple_fc
111
- def check_simple_fc_convergence (self , use_cuda , use_reduce = False ):
110
+ def _compare_reduce_and_allreduce (self , model , use_cuda , random_data = True ):
112
111
if use_cuda and not core .is_compiled_with_cuda ():
113
112
return
114
- self .check_network_convergence (simple_fc_net , use_cuda = use_cuda )
115
113
self .check_network_convergence (
116
- simple_fc_net , use_cuda = use_cuda , allow_op_delay = True )
117
-
118
- img , label = self ._init_data ()
119
-
114
+ model , use_cuda = use_cuda , use_reduce = True )
120
115
self .check_network_convergence (
121
- simple_fc_net ,
122
- feed_dict = {"image" : img ,
123
- "label" : label },
124
- use_cuda = use_cuda ,
125
- use_reduce = use_reduce )
116
+ model , use_cuda = use_cuda , allow_op_delay = True , use_reduce = True )
126
117
127
- def check_simple_fc_convergence_with_Reduce (self , use_cuda ):
128
- if use_cuda and not core .is_compiled_with_cuda ():
129
- return
130
- self .check_network_convergence (
131
- simple_fc_net , use_cuda = use_cuda , use_reduce = True )
132
- self .check_network_convergence (
133
- simple_fc_net ,
134
- use_cuda = use_cuda ,
135
- allow_op_delay = True ,
136
- use_reduce = True )
137
-
138
- img , label = self ._init_data ()
118
+ img , label = self ._init_data (random_data )
139
119
140
120
all_reduce_first_loss , all_reduce_last_loss = self .check_network_convergence (
141
- simple_fc_net ,
121
+ model ,
142
122
feed_dict = {"image" : img ,
143
123
"label" : label },
144
124
use_cuda = use_cuda ,
145
125
use_reduce = False )
146
126
reduce_first_loss , reduce_last_loss = self .check_network_convergence (
147
- simple_fc_net ,
127
+ model ,
148
128
feed_dict = {"image" : img ,
149
129
"label" : label },
150
130
use_cuda = use_cuda ,
@@ -153,7 +133,24 @@ def check_simple_fc_convergence_with_Reduce(self, use_cuda):
153
133
for loss in zip (all_reduce_first_loss , reduce_first_loss ):
154
134
self .assertAlmostEquals (loss [0 ], loss [1 ], delta = 1e-6 )
155
135
for loss in zip (all_reduce_last_loss , reduce_last_loss ):
156
- self .assertAlmostEquals (loss [0 ], loss [1 ], delta = 1e-6 )
136
+ self .assertAlmostEquals (loss [0 ], loss [1 ], delta = 1e-4 )
137
+
138
+ # simple_fc
139
+ def check_simple_fc_convergence (self , use_cuda , use_reduce = False ):
140
+ if use_cuda and not core .is_compiled_with_cuda ():
141
+ return
142
+ self .check_network_convergence (simple_fc_net , use_cuda = use_cuda )
143
+ self .check_network_convergence (
144
+ simple_fc_net , use_cuda = use_cuda , allow_op_delay = True )
145
+
146
+ img , label = self ._init_data ()
147
+
148
+ self .check_network_convergence (
149
+ simple_fc_net ,
150
+ feed_dict = {"image" : img ,
151
+ "label" : label },
152
+ use_cuda = use_cuda ,
153
+ use_reduce = use_reduce )
157
154
158
155
def test_simple_fc (self ):
159
156
# use_cuda
@@ -162,8 +159,8 @@ def test_simple_fc(self):
162
159
163
160
def test_simple_fc_with_new_strategy (self ):
164
161
# use_cuda, use_reduce
165
- self .check_simple_fc_convergence_with_Reduce ( True )
166
- self .check_simple_fc_convergence_with_Reduce ( False )
162
+ self ._compare_reduce_and_allreduce ( simple_fc_net , True )
163
+ self ._compare_reduce_and_allreduce ( simple_fc_net , False )
167
164
168
165
def check_simple_fc_parallel_accuracy (self , use_cuda ):
169
166
if use_cuda and not core .is_compiled_with_cuda ():
@@ -209,39 +206,13 @@ def check_batchnorm_fc_convergence(self, use_cuda):
209
206
"label" : label },
210
207
use_cuda = use_cuda )
211
208
212
- def check_batchnorm_fc_convergence_use_reduce (self , use_cuda ):
213
- if use_cuda and not core .is_compiled_with_cuda ():
214
- return
215
- self .check_network_convergence (
216
- fc_with_batchnorm , use_cuda = use_cuda , use_reduce = True )
217
-
218
- img , label = self ._init_data ()
219
-
220
- all_reduce_first_loss , all_reduce_last_loss = self .check_network_convergence (
221
- fc_with_batchnorm ,
222
- feed_dict = {"image" : img ,
223
- "label" : label },
224
- use_cuda = use_cuda ,
225
- use_reduce = False )
226
- reduce_first_loss , reduce_last_loss = self .check_network_convergence (
227
- fc_with_batchnorm ,
228
- feed_dict = {"image" : img ,
229
- "label" : label },
230
- use_cuda = use_cuda ,
231
- use_reduce = True )
232
-
233
- for loss in zip (all_reduce_first_loss , reduce_first_loss ):
234
- self .assertAlmostEquals (loss [0 ], loss [1 ], delta = 1e-6 )
235
- for loss in zip (all_reduce_last_loss , reduce_last_loss ):
236
- self .assertAlmostEquals (loss [0 ], loss [1 ], delta = 1e-4 )
237
-
238
209
def test_batchnorm_fc (self ):
239
210
self .check_batchnorm_fc_convergence (True )
240
211
self .check_batchnorm_fc_convergence (False )
241
212
242
213
def test_batchnorm_fc_with_new_strategy (self ):
243
- self .check_batchnorm_fc_convergence_use_reduce ( True )
244
- self .check_batchnorm_fc_convergence_use_reduce ( False )
214
+ self ._compare_reduce_and_allreduce ( fc_with_batchnorm , True )
215
+ self ._compare_reduce_and_allreduce ( fc_with_batchnorm , False )
245
216
246
217
247
218
if __name__ == '__main__' :
0 commit comments