Skip to content

Commit c0f94ae

Browse files
authored
fix bug with heart beat , test=develop (#20658)
1 parent 194f3dc commit c0f94ae

File tree

2 files changed

+6
-9
lines changed

2 files changed

+6
-9
lines changed

paddle/fluid/operators/distributed/heart_beat_monitor.h

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -84,14 +84,7 @@ class HeartBeatMonitor {
8484
be_monitored_var);
8585
}
8686

87-
static HeartBeatMonitor* GetInstance() {
88-
if (monitor_ == nullptr) {
89-
PADDLE_THROW(
90-
"HeartBeatMonitor is not inited, call "
91-
"HeartBeatMonitor::Init first");
92-
}
93-
return monitor_.get();
94-
}
87+
static HeartBeatMonitor* GetInstance() { return monitor_.get(); }
9588

9689
void Stop() {
9790
running_ = false;

paddle/fluid/operators/distributed/request_handler_impl.cc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,11 @@ bool RequestSendHandler::Handle(const std::string& varname,
5353
rpc_server_->IncreaseBatchBarrier(kRequestSend);
5454
} else if (varname == COMPLETE_MESSAGE) {
5555
VLOG(3) << "sync: recv complete message";
56-
HeartBeatMonitor::GetInstance()->Update(trainer_id, "", COMPLETED);
56+
57+
if (HeartBeatMonitor::GetInstance() != nullptr) {
58+
HeartBeatMonitor::GetInstance()->Update(trainer_id, "", COMPLETED);
59+
}
60+
5761
rpc_server_->Complete();
5862
} else {
5963
// Async

0 commit comments

Comments
 (0)