Skip to content

Commit aff8a26

Browse files
committed
check generated_op_
1 parent 2e5d44f commit aff8a26

File tree

3 files changed

+26
-30
lines changed

3 files changed

+26
-30
lines changed

paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ void NCCLAllReduceOpHandle::RunImpl() {
3636
// Wait input done
3737
for (auto *in : inputs_) {
3838
auto &p = static_cast<VarHandle *>(in)->place_;
39-
in->generated_op_->Wait(dev_ctxes_[p]);
39+
if (in->generated_op_) in->generated_op_->Wait(dev_ctxes_[p]);
4040
}
4141

4242
auto &var_name = static_cast<VarHandle *>(this->inputs_[0])->name_;

paddle/fluid/framework/details/send_op_handle.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ void SendOpHandle::RunImpl() {
3232
if (in->DebugString() == "dummy") { // HACK
3333
continue;
3434
}
35-
in->generated_op_->Wait(dev_ctxes_[p]);
35+
if (in->generated_op_) in->generated_op_->Wait(dev_ctxes_[p]);
3636
}
3737
auto &tmp_scope = local_scope_->FindVar(kLocalExecScopeName)->Get<Scope *>();
3838
// FIXME(wuyi): can not use RunAndRecordEvent here, for it will cause dead

python/paddle/fluid/tests/unittests/test_parallel_executor.py

Lines changed: 24 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import numpy
15+
import numpy as np
1616
import unittest
1717

1818
import paddle.fluid as fluid
@@ -243,7 +243,7 @@ def run_executor(exe, feed, fetch_list, program=None):
243243
begin = time.time()
244244
first_loss, = run_executor(
245245
exe=exe, feed=feed_dict, fetch_list=[loss.name])
246-
first_loss = numpy.array(first_loss)
246+
first_loss = np.array(first_loss)
247247

248248
for i in xrange(iter):
249249
run_executor(exe=exe, feed=feed_dict, fetch_list=[])
@@ -256,7 +256,7 @@ def run_executor(exe, feed, fetch_list, program=None):
256256
print "%.4f Instance per second" % (
257257
(batch_size * iter + 2) / (end - begin))
258258

259-
last_loss = numpy.array(last_loss)
259+
last_loss = np.array(last_loss)
260260

261261
print first_loss, last_loss
262262
# self.assertGreater(first_loss[0], last_loss[0])
@@ -284,8 +284,8 @@ def check_simple_fc_convergence(self):
284284
self.check_network_convergence(simple_fc_net)
285285
self.check_network_convergence(simple_fc_net, allow_op_delay=True)
286286

287-
img = numpy.zeros(shape=[32, 784], dtype='float32')
288-
label = numpy.ones(shape=[32, 1], dtype='int64')
287+
img = np.zeros(shape=[32, 784], dtype='float32')
288+
label = np.ones(shape=[32, 1], dtype='int64')
289289
self.check_network_convergence(
290290
simple_fc_net, feed_dict={"image": img,
291291
"label": label})
@@ -294,8 +294,8 @@ def test_simple_fc(self):
294294
self.check_simple_fc_convergence()
295295

296296
def check_simple_fc_parallel_accuracy(self):
297-
img = numpy.zeros(shape=[32, 784], dtype='float32')
298-
label = numpy.ones(shape=[32, 1], dtype='int64')
297+
img = np.zeros(shape=[32, 784], dtype='float32')
298+
label = np.ones(shape=[32, 1], dtype='int64')
299299
single_first_loss, single_last_loss = self.check_network_convergence(
300300
method=simple_fc_net,
301301
seed=1000,
@@ -319,8 +319,8 @@ def test_simple_fc_parallel_accuracy(self):
319319

320320
def check_batchnorm_fc_convergence(self):
321321
self.check_network_convergence(fc_with_batchnorm)
322-
img = numpy.zeros(shape=[32, 784], dtype='float32')
323-
label = numpy.ones(shape=[32, 1], dtype='int64')
322+
img = np.zeros(shape=[32, 784], dtype='float32')
323+
label = np.ones(shape=[32, 1], dtype='int64')
324324
self.check_network_convergence(
325325
fc_with_batchnorm, feed_dict={"image": img,
326326
"label": label})
@@ -404,9 +404,6 @@ class ModelHyperParams(object):
404404
dropout = 0.1
405405

406406

407-
import numpy as np
408-
409-
410407
def prepare_batch_input(insts, src_pad_idx, trg_pad_idx, n_head):
411408
"""
412409
Pad the instances to the max sequence length in batch, and generate the
@@ -533,9 +530,8 @@ def check_network_convergence(self):
533530
opt.minimize(loss)
534531

535532
batch_size = 32
536-
image = numpy.random.normal(size=(batch_size,
537-
784)).astype('float32')
538-
label = numpy.random.randint(0, 10, (batch_size, 1), dtype="int64")
533+
image = np.random.normal(size=(batch_size, 784)).astype('float32')
534+
label = np.random.randint(0, 10, (batch_size, 1), dtype="int64")
539535

540536
place = fluid.CUDAPlace(0)
541537
exe = fluid.Executor(place)
@@ -552,12 +548,12 @@ def check_network_convergence(self):
552548

553549
for i in xrange(5):
554550
test_loss, = test_exe.run([loss.name], feed=feed_dict)
555-
test_loss = numpy.array(test_loss)
551+
test_loss = np.array(test_loss)
556552

557553
train_loss, = train_exe.run([loss.name], feed=feed_dict)
558-
train_loss = numpy.array(train_loss)
554+
train_loss = np.array(train_loss)
559555
self.assertTrue(
560-
numpy.allclose(
556+
np.allclose(
561557
train_loss, test_loss, atol=1e-8),
562558
"Train loss: " + str(train_loss) + "\n Test loss:" +
563559
str(test_loss))
@@ -712,7 +708,7 @@ def check_network_convergence(self, is_sparse):
712708
data = train_data()
713709
for i in xrange(10):
714710
cur_batch = next(data)
715-
print map(numpy.array,
711+
print map(np.array,
716712
pe.run(feed=feeder.feed(cur_batch),
717713
fetch_list=[avg_cost.name]))[0]
718714

@@ -723,7 +719,7 @@ def test_update_dense_parameter(self):
723719
self.check_network_convergence(is_sparse=False)
724720

725721

726-
# test fetch op
722+
# test fetch all the variables of global_block
727723

728724
import paddle.dataset.flowers as flowers
729725

@@ -763,7 +759,8 @@ def parallel_exe(self, train_inputs, seed):
763759
opt.minimize(loss)
764760

765761
# TODO(zcd): I found that onece the memory optimizer is open,
766-
# parallel_exe doesn't fetch some variable, such as conv2d_0.b_0@GRAD, conv2d_1.b_0@GRAD.
762+
# parallel_exe doesn't fetch some variable, such as conv2d_0.b_0@GRAD,
763+
# conv2d_1.b_0@GRAD. Those variables should not be pruned.
767764
# fluid.memory_optimize(main)
768765

769766
place = fluid.CUDAPlace(0)
@@ -775,16 +772,15 @@ def parallel_exe(self, train_inputs, seed):
775772
use_cuda=True, loss_name=loss.name, main_program=main)
776773

777774
fetch_list = []
778-
for data in train_inputs:
779-
all_vars = main.global_block().vars
780-
for k, v in all_vars.iteritems():
781-
if v.persistable and 'velocity' not in k:
782-
fetch_list.append(k)
775+
all_vars = main.global_block().vars
776+
for k, v in all_vars.iteritems():
777+
if 'velocity' not in k:
778+
fetch_list.append(k)
783779

780+
for data in train_inputs:
784781
ret = pe.run(fetch_list, feed=feeder.feed(data))
785-
result = {}
786782
for i in range(len(fetch_list)):
787-
result[fetch_list[i]] = np.sum(ret[i])
783+
print("%s - %s" % (fetch_list[i], np.sum(ret[i])))
788784

789785
def test_update_sparse_parameter(self):
790786
tst_reader = paddle.batch(flowers.test(use_xmap=False), batch_size=16)

0 commit comments

Comments
 (0)