Skip to content

Commit 521086d

Browse files
authored
Make federated client more robust (dmlc#8351)
1 parent 5647fc6 commit 521086d

File tree

1 file changed

+8
-2
lines changed

1 file changed

+8
-2
lines changed

plugin/federated/federated_client.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,11 @@ class FederatedClient {
2828
options.pem_cert_chain = client_cert;
2929
grpc::ChannelArguments args;
3030
args.SetMaxReceiveMessageSize(std::numeric_limits<int>::max());
31-
return Federated::NewStub(
32-
grpc::CreateCustomChannel(server_address, grpc::SslCredentials(options), args));
31+
auto channel =
32+
grpc::CreateCustomChannel(server_address, grpc::SslCredentials(options), args);
33+
channel->WaitForConnected(
34+
gpr_time_add(gpr_now(GPR_CLOCK_REALTIME), gpr_time_from_seconds(60, GPR_TIMESPAN)));
35+
return Federated::NewStub(channel);
3336
}()},
3437
rank_{rank} {}
3538

@@ -51,6 +54,7 @@ class FederatedClient {
5154

5255
AllgatherReply reply;
5356
grpc::ClientContext context;
57+
context.set_wait_for_ready(true);
5458
grpc::Status status = stub_->Allgather(&context, request, &reply);
5559

5660
if (status.ok()) {
@@ -72,6 +76,7 @@ class FederatedClient {
7276

7377
AllreduceReply reply;
7478
grpc::ClientContext context;
79+
context.set_wait_for_ready(true);
7580
grpc::Status status = stub_->Allreduce(&context, request, &reply);
7681

7782
if (status.ok()) {
@@ -91,6 +96,7 @@ class FederatedClient {
9196

9297
BroadcastReply reply;
9398
grpc::ClientContext context;
99+
context.set_wait_for_ready(true);
94100
grpc::Status status = stub_->Broadcast(&context, request, &reply);
95101

96102
if (status.ok()) {

0 commit comments

Comments
 (0)