Skip to content

Commit d3ed070

Browse files
committed
test=develop
1 parent fb6201e commit d3ed070

File tree

2 files changed

+5
-40
lines changed

2 files changed

+5
-40
lines changed

paddle/fluid/framework/parallel_executor.cc

Lines changed: 5 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,6 @@ ParallelExecutor::ParallelExecutor(
6464
const ExecutionStrategy &exec_strategy, const BuildStrategy &build_strategy,
6565
size_t num_trainers, size_t trainer_id)
6666
: member_(new ParallelExecutorPrivate(places)) {
67-
is_alive_.test_and_set();
68-
6967
member_->global_scope_ = scope;
7068
member_->use_cuda_ = exec_strategy.use_cuda_;
7169
member_->use_all_reduce_ =
@@ -248,15 +246,6 @@ void ParallelExecutor::BCastParamsToDevices(
248246

249247
void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors,
250248
const std::string &fetched_var_name) {
251-
// If ParallelExecutor has been destructed
252-
// just return
253-
if (!is_alive_.test_and_set()) return;
254-
255-
// If ParallelExecutor is running
256-
if (is_running_.test_and_set()) {
257-
PADDLE_THROW("The previous ParallelExecutor::Run() has not stopped");
258-
}
259-
260249
platform::RecordBlock b(0);
261250
#ifdef PADDLE_WITH_CUDA
262251
if (!gcs_.empty()) {
@@ -270,17 +259,9 @@ void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors,
270259
}
271260
}
272261
#endif
273-
try {
274-
auto fetch_data = member_->executor_->Run(fetch_tensors);
275-
*member_->global_scope_->Var(fetched_var_name)
276-
->GetMutable<FeedFetchList>() = fetch_data;
277-
is_running_.clear();
278-
} catch (...) {
279-
is_running_.clear();
280-
if (is_alive_.test_and_set()) {
281-
std::rethrow_exception(std::current_exception());
282-
}
283-
}
262+
auto fetch_data = member_->executor_->Run(fetch_tensors);
263+
*member_->global_scope_->Var(fetched_var_name)->GetMutable<FeedFetchList>() =
264+
fetch_data;
284265
}
285266

286267
void ParallelExecutor::FeedTensorsIntoLocalScopes(
@@ -318,7 +299,6 @@ void ParallelExecutor::FeedAndSplitTensorIntoLocalScopes(
318299
}
319300

320301
ParallelExecutor::~ParallelExecutor() {
321-
is_alive_.clear();
322302
if (member_->own_local_scope_) {
323303
for (size_t i = 1; i < member_->local_scopes_.size(); ++i) {
324304
Scope *local_scope = member_->local_scopes_[i];
@@ -328,10 +308,8 @@ ParallelExecutor::~ParallelExecutor() {
328308
}
329309
}
330310

331-
while (is_running_.test_and_set()) {
332-
// wait unitl all threads have been stopped
333-
}
334-
311+
// member_ must be destructed before gcs_ since the destructor of
312+
// ReferenceCountOpHandle use raw pointers of gcs_ inside.
335313
member_.reset();
336314
}
337315

paddle/fluid/framework/parallel_executor.h

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -77,19 +77,6 @@ class ParallelExecutor {
7777

7878
std::unique_ptr<ParallelExecutorPrivate> member_;
7979

80-
// FIXME(zjl): HOT-FIX
81-
// A flag to indicate whether ParallelExecutor is destructed.
82-
// In Python side, when users interrupt the process manually, such as
83-
// keyboard interrupt, ParallelExecutor may be destructed before Run() ends.
84-
// Thus, disturbing exception messages would occur when interrupted.
85-
// If is_alive_ is false, we would discard the last exception thrown by Run().
86-
// Since std::atomic_flag is always lock-free and faster than
87-
// std::atomic<bool>, we choose std::atomic_flag to be the flag here.
88-
std::atomic_flag is_alive_ = ATOMIC_FLAG_INIT;
89-
90-
// A flag to indicate whether ParallelExecutor is running.
91-
std::atomic_flag is_running_ = ATOMIC_FLAG_INIT;
92-
9380
#ifdef PADDLE_WITH_CUDA
9481
// ref_cnts_ is only initialized when ParallelExecutor constructs, and then
9582
// keeps unchanged

0 commit comments

Comments
 (0)