Skip to content

Commit a3ac54b

Browse files
author
chengduo
authored
Fix Reduce functor (#12262)
* Fix Reduce and Gather * Fix unit test
1 parent 6c981e7 commit a3ac54b

File tree

2 files changed

+89
-25
lines changed

2 files changed

+89
-25
lines changed

paddle/fluid/framework/details/reduce_and_gather.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,16 @@ struct ReduceLoDTensor {
3535
PADDLE_ENFORCE(!src_tensors_.empty());
3636
auto &t0 = *src_tensors_[0];
3737
PADDLE_ENFORCE_NE(t0.numel(), 0);
38+
3839
dst_tensor_.Resize(t0.dims());
3940
T *dst = dst_tensor_.mutable_data<T>(platform::CPUPlace());
40-
if (dst != t0.data<T>()) {
41-
std::copy(t0.data<T>(), t0.data<T>() + t0.numel(), dst);
42-
}
4341

44-
for (size_t i = 1; i < src_tensors_.size(); ++i) {
42+
for (size_t i = 0; i < src_tensors_.size(); ++i) {
4543
auto &t = *src_tensors_[i];
44+
if (dst == t.data<T>()) {
45+
continue;
46+
}
47+
4648
PADDLE_ENFORCE_EQ(t.dims(), t0.dims());
4749
PADDLE_ENFORCE_EQ(t.type(), t0.type());
4850
std::transform(t.data<T>(), t.data<T>() + t.numel(), dst, dst,

python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py

Lines changed: 83 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -102,37 +102,79 @@ def setUpClass(cls):
102102
fluid.recordio_writer.convert_reader_to_recordio_file(
103103
MNIST_RECORDIO_FILE, reader, feeder)
104104

105+
def _init_data(self, random=True):
106+
np.random.seed(5)
107+
if random:
108+
img = np.random.random(size=[32, 784]).astype(np.float32)
109+
else:
110+
img = np.ones(shape=[32, 784], dtype='float32')
111+
label = np.ones(shape=[32, 1], dtype='int64')
112+
return img, label
113+
114+
# simple_fc
105115
def check_simple_fc_convergence(self, use_cuda, use_reduce=False):
106116
if use_cuda and not core.is_compiled_with_cuda():
107117
return
108118
self.check_network_convergence(simple_fc_net, use_cuda=use_cuda)
109119
self.check_network_convergence(
110120
simple_fc_net, use_cuda=use_cuda, allow_op_delay=True)
111121

112-
img = np.zeros(shape=[32, 784], dtype='float32')
113-
label = np.ones(shape=[32, 1], dtype='int64')
122+
img, label = self._init_data()
123+
114124
self.check_network_convergence(
115125
simple_fc_net,
116126
feed_dict={"image": img,
117127
"label": label},
118128
use_cuda=use_cuda,
119129
use_reduce=use_reduce)
120130

131+
def check_simple_fc_convergence_with_Reduce(self, use_cuda):
132+
if use_cuda and not core.is_compiled_with_cuda():
133+
return
134+
self.check_network_convergence(
135+
simple_fc_net, use_cuda=use_cuda, use_reduce=True)
136+
self.check_network_convergence(
137+
simple_fc_net,
138+
use_cuda=use_cuda,
139+
allow_op_delay=True,
140+
use_reduce=True)
141+
142+
img, label = self._init_data()
143+
144+
all_reduce_first_loss, all_reduce_last_loss = self.check_network_convergence(
145+
simple_fc_net,
146+
feed_dict={"image": img,
147+
"label": label},
148+
use_cuda=use_cuda,
149+
use_reduce=False)
150+
reduce_first_loss, reduce_last_loss = self.check_network_convergence(
151+
simple_fc_net,
152+
feed_dict={"image": img,
153+
"label": label},
154+
use_cuda=use_cuda,
155+
use_reduce=True)
156+
157+
for loss in zip(all_reduce_first_loss, reduce_first_loss):
158+
self.assertAlmostEquals(loss[0], loss[1], delta=1e-6)
159+
for loss in zip(all_reduce_last_loss, reduce_last_loss):
160+
self.assertAlmostEquals(loss[0], loss[1], delta=1e-6)
161+
121162
def test_simple_fc(self):
122163
# use_cuda
123164
self.check_simple_fc_convergence(True)
124165
self.check_simple_fc_convergence(False)
125166

126167
def test_simple_fc_with_new_strategy(self):
127168
# use_cuda, use_reduce
128-
self.check_simple_fc_convergence(True, True)
129-
self.check_simple_fc_convergence(False, True)
169+
self.check_simple_fc_convergence_with_Reduce(True)
170+
self.check_simple_fc_convergence_with_Reduce(False)
130171

131-
def check_simple_fc_parallel_accuracy(self, use_cuda, use_reduce=False):
172+
def check_simple_fc_parallel_accuracy(self, use_cuda):
132173
if use_cuda and not core.is_compiled_with_cuda():
133174
return
134-
img = np.zeros(shape=[32, 784], dtype='float32')
135-
label = np.ones(shape=[32, 1], dtype='int64')
175+
176+
img, label = self._init_data(random=False)
177+
136178
single_first_loss, single_last_loss = self.check_network_convergence(
137179
method=simple_fc_net,
138180
seed=1000,
@@ -146,8 +188,7 @@ def check_simple_fc_parallel_accuracy(self, use_cuda, use_reduce=False):
146188
feed_dict={"image": img,
147189
"label": label},
148190
use_cuda=use_cuda,
149-
use_parallel_executor=True,
150-
use_reduce=use_reduce)
191+
use_parallel_executor=True)
151192

152193
for p_f in parallel_first_loss:
153194
self.assertAlmostEquals(p_f, single_first_loss[0], delta=1e-6)
@@ -158,32 +199,53 @@ def test_simple_fc_parallel_accuracy(self):
158199
self.check_simple_fc_parallel_accuracy(True)
159200
self.check_simple_fc_parallel_accuracy(False)
160201

161-
def test_simple_fc_parallel_accuracy_with_new_strategy(self):
162-
# use_cuda, use_reduce
163-
self.check_simple_fc_parallel_accuracy(True, True)
164-
self.check_simple_fc_parallel_accuracy(False, True)
165-
166-
def check_batchnorm_fc_convergence(self, use_cuda, use_reduce=False):
202+
def check_batchnorm_fc_convergence(self, use_cuda):
167203
if use_cuda and not core.is_compiled_with_cuda():
168204
return
205+
169206
self.check_network_convergence(fc_with_batchnorm, use_cuda=use_cuda)
170-
img = np.zeros(shape=[32, 784], dtype='float32')
171-
label = np.ones(shape=[32, 1], dtype='int64')
207+
208+
img, label = self._init_data()
209+
210+
self.check_network_convergence(
211+
fc_with_batchnorm,
212+
feed_dict={"image": img,
213+
"label": label},
214+
use_cuda=use_cuda)
215+
216+
def check_batchnorm_fc_convergence_use_reduce(self, use_cuda):
217+
if use_cuda and not core.is_compiled_with_cuda():
218+
return
172219
self.check_network_convergence(
220+
fc_with_batchnorm, use_cuda=use_cuda, use_reduce=True)
221+
222+
img, label = self._init_data()
223+
224+
all_reduce_first_loss, all_reduce_last_loss = self.check_network_convergence(
173225
fc_with_batchnorm,
174226
feed_dict={"image": img,
175227
"label": label},
176228
use_cuda=use_cuda,
177-
use_reduce=use_reduce)
229+
use_reduce=False)
230+
reduce_first_loss, reduce_last_loss = self.check_network_convergence(
231+
fc_with_batchnorm,
232+
feed_dict={"image": img,
233+
"label": label},
234+
use_cuda=use_cuda,
235+
use_reduce=True)
236+
237+
for loss in zip(all_reduce_first_loss, reduce_first_loss):
238+
self.assertAlmostEquals(loss[0], loss[1], delta=1e-6)
239+
for loss in zip(all_reduce_last_loss, reduce_last_loss):
240+
self.assertAlmostEquals(loss[0], loss[1], delta=1e-4)
178241

179242
def test_batchnorm_fc(self):
180243
self.check_batchnorm_fc_convergence(True)
181244
self.check_batchnorm_fc_convergence(False)
182245

183246
def test_batchnorm_fc_with_new_strategy(self):
184-
# use_cuda, use_reduce
185-
self.check_batchnorm_fc_convergence(True, True)
186-
self.check_batchnorm_fc_convergence(False, True)
247+
self.check_batchnorm_fc_convergence_use_reduce(True)
248+
self.check_batchnorm_fc_convergence_use_reduce(False)
187249

188250

189251
if __name__ == '__main__':

0 commit comments

Comments
 (0)