Skip to content

Commit c28beb8

Browse files
authored
test(Pe): add dry run tests for pe (#14254)
Dry run tests will skip `Op.Run` and just perform job scheduling. It helps to analysis dead lock in PE. test=develop
1 parent 8013293 commit c28beb8

File tree

8 files changed

+108
-16
lines changed

8 files changed

+108
-16
lines changed

paddle/fluid/framework/details/execution_strategy.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
// limitations under the License.
1414

1515
#pragma once
16+
#include <cstddef> // for size_t
1617

1718
namespace paddle {
1819
namespace framework {
@@ -26,6 +27,7 @@ struct ExecutionStrategy {
2627
bool allow_op_delay_{false};
2728
size_t num_iteration_per_drop_scope_{100};
2829
ExecutorType type_{kDefault};
30+
bool dry_run_{false};
2931
};
3032

3133
} // namespace details

paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,9 @@ void FastThreadedSSAGraphExecutor::RunOpAsync(
128128
size_t complete = 0;
129129
while (op_to_run != nullptr) {
130130
try {
131-
op_to_run->Run(strategy_.use_cuda_);
131+
if (LIKELY(!strategy_.dry_run_)) {
132+
op_to_run->Run(strategy_.use_cuda_);
133+
}
132134
++complete;
133135
} catch (...) {
134136
exception_.Catch(std::current_exception());

paddle/fluid/framework/details/threaded_ssa_graph_executor.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,9 @@ void ThreadedSSAGraphExecutor::RunOp(
211211
if (VLOG_IS_ON(10)) {
212212
VLOG(10) << op << " " << op->Name() << " : " << op->DebugString();
213213
}
214-
op->Run(strategy_.use_cuda_);
214+
if (LIKELY(!strategy_.dry_run_)) {
215+
op->Run(strategy_.use_cuda_);
216+
}
215217
VLOG(10) << op << " " << op->Name() << " Done ";
216218
running_ops_--;
217219
ready_var_q->Extend(op->Outputs());

paddle/fluid/framework/details/threaded_ssa_graph_executor.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor {
4848
// Use topological sort algorithm
4949
FeedFetchList Run(const std::vector<std::string> &fetch_tensors) override;
5050

51-
~ThreadedSSAGraphExecutor() {}
51+
~ThreadedSSAGraphExecutor() final = default;
5252

5353
private:
5454
void RunOp(const std::shared_ptr<BlockingQueue<VarHandleBase *>> &ready_var_q,

paddle/fluid/framework/parallel_executor.cc

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,20 @@ class ParallelExecutorPrivate {
3838
explicit ParallelExecutorPrivate(const std::vector<platform::Place> &places)
3939
: places_(places) {}
4040

41+
~ParallelExecutorPrivate() {
42+
if (own_local_scope_) {
43+
for (size_t i = 1; i < local_scopes_.size(); ++i) {
44+
// Skip the first scope, since it is the global scope.
45+
Scope *local_scope = local_scopes_[i];
46+
if (global_scope_->HasKid(local_scope)) {
47+
global_scope_->DeleteScope(local_scope);
48+
}
49+
}
50+
}
51+
}
4152
std::vector<platform::Place> places_;
4253
std::vector<Scope *> local_scopes_;
43-
Scope *global_scope_;
54+
Scope *global_scope_; // not owned
4455
std::unique_ptr<details::SSAGraphExecutor> executor_;
4556

4657
#ifdef PADDLE_WITH_CUDA
@@ -306,16 +317,6 @@ ParallelExecutor::~ParallelExecutor() {
306317
for (auto &p : member_->places_) {
307318
platform::DeviceContextPool::Instance().Get(p)->Wait();
308319
}
309-
310-
if (member_->own_local_scope_) {
311-
for (size_t i = 1; i < member_->local_scopes_.size(); ++i) {
312-
Scope *local_scope = member_->local_scopes_[i];
313-
if (member_->global_scope_->HasKid(local_scope)) {
314-
member_->global_scope_->DeleteScope(local_scope);
315-
}
316-
}
317-
}
318-
319320
// member_ must be destructed before gcs_ since the destructor of
320321
// ReferenceCountOpHandle use raw pointers of gcs_ inside.
321322
member_.reset();

paddle/fluid/pybind/pybind.cc

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -742,7 +742,12 @@ All parameter, weight, gradient are variables in Paddle.
742742
will clean up the temp variables at the end of the current iteration.
743743
2. In some NLP model, it may cause the GPU memory is insufficient,
744744
in this case, you should reduce `num_iteration_per_drop_scope`.
745-
)DOC");
745+
)DOC")
746+
.def_property("_dry_run",
747+
[](const ExecutionStrategy &self) { return self.dry_run_; },
748+
[](ExecutionStrategy &self, bool dry_run) {
749+
self.dry_run_ = dry_run;
750+
});
746751

747752
exec_strategy.def_property(
748753
"use_experimental_executor",

python/paddle/fluid/layers/io.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def data(name,
6060
For example if shape=[1], the resulting shape is [-1, 1].
6161
2. If shape contains -1, such as shape=[1, -1],
6262
append_batch_size will be enforced to be be False (ineffective).
63-
dtype(int|float): The type of data : float32, float_16, int etc
63+
dtype(basestring): The type of data : float32, float_16, int etc
6464
type(VarType): The output type. By default it is LOD_TENSOR.
6565
lod_level(int): The LoD Level. 0 means the input data is not a sequence.
6666
stop_gradient(bool): A boolean that mentions whether gradient should flow.
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import paddle.fluid as fluid
16+
import unittest
17+
import logging
18+
import six
19+
20+
21+
class TestBase(unittest.TestCase):
22+
def main(self,
23+
network_func,
24+
iter=100,
25+
iter_per_pe=100,
26+
use_gpu=True,
27+
use_experimental_executor=False):
28+
if use_gpu and not fluid.core.is_compiled_with_cuda():
29+
logging.warning(
30+
"Paddle is not compiled with CUDA, skip GPU unittests")
31+
return
32+
33+
main_prog = fluid.Program()
34+
startup_prog = fluid.Program()
35+
scope = fluid.Scope()
36+
with fluid.program_guard(main_prog, startup_prog):
37+
with fluid.scope_guard(scope):
38+
loss = network_func()
39+
fluid.Executor(
40+
fluid.CUDAPlace(0)
41+
if use_gpu else fluid.CPUPlace()).run(startup_prog)
42+
43+
for _ in six.moves.xrange(iter):
44+
exe_strategy = fluid.ExecutionStrategy()
45+
exe_strategy._dry_run = True
46+
exe_strategy.use_experimental_executor = use_experimental_executor
47+
pe = fluid.ParallelExecutor(
48+
use_cuda=True,
49+
loss_name=loss.name,
50+
main_program=main_prog,
51+
exec_strategy=exe_strategy)
52+
for _ in six.moves.xrange(iter_per_pe):
53+
pe.run([])
54+
55+
56+
class TestMNISTDryRun(TestBase):
57+
def test_mnist_dry_run(self):
58+
for use_gpu in (False, True):
59+
for use_experimental_executor in (False, True):
60+
self.main(
61+
network_func=TestMNISTDryRun.network_func,
62+
use_gpu=use_gpu,
63+
use_experimental_executor=use_experimental_executor)
64+
65+
@staticmethod
66+
def network_func():
67+
img = fluid.layers.data(name='img', shape=[784], dtype='float32')
68+
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
69+
hidden = img
70+
for _ in six.moves.xrange(10):
71+
hidden = fluid.layers.fc(input=img, size=200, act='tanh')
72+
prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
73+
loss = fluid.layers.cross_entropy(input=prediction, label=label)
74+
avg_loss = fluid.layers.mean(loss)
75+
fluid.optimizer.Adam().minimize(avg_loss)
76+
return avg_loss
77+
78+
79+
if __name__ == '__main__':
80+
unittest.main()

0 commit comments

Comments
 (0)