@@ -94,16 +94,18 @@ Status RPCClient::Connect(const std::string& rpc_endpoint) {
9494Status RPCClient::Connect (const std::string& rpc_endpoint,
9595 std::string const & username,
9696 std::string const & password,
97- const std::string& rdma_endpoint) {
97+ const std::string& rdma_endpoint,
98+ std::string src_rdma_ednpoint) {
9899 return this ->Connect (rpc_endpoint, RootSessionID (), username, password,
99- rdma_endpoint);
100+ rdma_endpoint, src_rdma_ednpoint );
100101}
101102
102103Status RPCClient::Connect (const std::string& rpc_endpoint,
103104 const SessionID session_id,
104105 std::string const & username,
105106 std::string const & password,
106- const std::string& rdma_endpoint) {
107+ const std::string& rdma_endpoint,
108+ std::string src_rdma_ednpoint) {
107109 size_t pos = rpc_endpoint.find (" :" );
108110 std::string host, port;
109111 if (pos == std::string::npos) {
@@ -125,28 +127,32 @@ Status RPCClient::Connect(const std::string& rpc_endpoint,
125127
126128 return this ->Connect (host, static_cast <uint32_t >(std::stoul (port)),
127129 session_id, username, password, rdma_host,
128- static_cast <uint32_t >(std::stoul (rdma_port)));
130+ static_cast <uint32_t >(std::stoul (rdma_port)),
131+ src_rdma_ednpoint);
129132}
130133
131134Status RPCClient::Connect (const std::string& host, uint32_t port,
132- const std::string& rdma_host, uint32_t rdma_port) {
135+ const std::string& rdma_host, uint32_t rdma_port,
136+ std::string src_rdma_ednpoint) {
133137 return this ->Connect (host, port, RootSessionID (), " " , " " , rdma_host,
134- rdma_port);
138+ rdma_port, src_rdma_ednpoint );
135139}
136140
137141Status RPCClient::Connect (const std::string& host, uint32_t port,
138142 std::string const & username,
139143 std::string const & password,
140- const std::string& rdma_host, uint32_t rdma_port) {
144+ const std::string& rdma_host, uint32_t rdma_port,
145+ std::string src_rdma_ednpoint) {
141146 return this ->Connect (host, port, RootSessionID (), username, password,
142- rdma_host, rdma_port);
147+ rdma_host, rdma_port, src_rdma_ednpoint );
143148}
144149
145150Status RPCClient::Connect (const std::string& host, uint32_t port,
146151 const SessionID session_id,
147152 std::string const & username,
148153 std::string const & password,
149- const std::string& rdma_host, uint32_t rdma_port) {
154+ const std::string& rdma_host, uint32_t rdma_port,
155+ std::string src_rdma_ednpoint) {
150156 std::lock_guard<std::recursive_mutex> guard (client_mutex_);
151157 std::string rpc_endpoint = host + " :" + std::to_string (port);
152158 RETURN_ON_ASSERT (!connected_ || rpc_endpoint == rpc_endpoint_);
@@ -183,7 +189,8 @@ Status RPCClient::Connect(const std::string& host, uint32_t port,
183189 instance_id_ = UnspecifiedInstanceID () - 1 ;
184190
185191 if (rdma_host.length () > 0 ) {
186- Status status = ConnectRDMA (rdma_host, rdma_port);
192+ src_rdma_endpoint_ = src_rdma_ednpoint;
193+ Status status = ConnectRDMA (rdma_host, rdma_port, src_rdma_ednpoint);
187194 if (status.ok ()) {
188195 rdma_endpoint_ = rdma_host + " :" + std::to_string (rdma_port);
189196 std::cout << " Connected to RPC server: " << rpc_endpoint
@@ -192,33 +199,38 @@ Status RPCClient::Connect(const std::string& host, uint32_t port,
192199 } else {
193200 std::cout << " Connect RDMA server failed! Fall back to RPC mode. Error:"
194201 << status.message () << std::endl;
202+ std::cout << " Failed src_rdma_ednpoint: " << src_rdma_ednpoint
203+ << std::endl;
195204 }
196205 }
197206
198207 return Status::OK ();
199208}
200209
201- Status RPCClient::ConnectRDMA (const std::string& rdma_host,
202- uint32_t rdma_port ) {
210+ Status RPCClient::ConnectRDMA (const std::string& rdma_host, uint32_t rdma_port,
211+ std::string src_rdma_endpoint ) {
203212 if (this ->rdma_connected_ ) {
204213 return Status::OK ();
205214 }
206215
207216 RETURN_ON_ERROR (RDMAClientCreator::Create (this ->rdma_client_ , rdma_host,
208- static_cast <int >(rdma_port)));
217+ static_cast <int >(rdma_port),
218+ src_rdma_endpoint));
209219
210220 int retry = 0 ;
211221 do {
212- if (this ->rdma_client_ ->Connect ().ok ()) {
222+ Status status = this ->rdma_client_ ->Connect ();
223+ if (status.ok ()) {
213224 break ;
214225 }
215226 if (retry == 10 ) {
216227 return Status::Invalid (" Failed to connect to RDMA server." );
217228 }
218229 retry++;
219230 usleep (300 * 1000 );
220- std::cout << " Connect rdma server failed! retry: " << retry << " times."
221- << std::endl;
231+ std::cout << " Connect rdma server failed! Error:" + status.message () +
232+ " retry: "
233+ << retry << " times." << std::endl;
222234 } while (true );
223235 this ->rdma_connected_ = true ;
224236 return Status::OK ();
@@ -272,6 +284,9 @@ Status RPCClient::RDMAReleaseMemInfo(RegisterMemInfo& remote_info) {
272284
273285Status RPCClient::StopRDMA () {
274286 if (!rdma_connected_) {
287+ RETURN_ON_ERROR (
288+ RDMAClientCreator::Release (RDMAClientCreator::buildConnectionKey (
289+ rdma_endpoint_, src_rdma_endpoint_)));
275290 return Status::OK ();
276291 }
277292 rdma_connected_ = false ;
@@ -285,7 +300,9 @@ Status RPCClient::StopRDMA() {
285300
286301 RETURN_ON_ERROR (rdma_client_->Stop ());
287302 RETURN_ON_ERROR (rdma_client_->Close ());
288- RETURN_ON_ERROR (RDMAClientCreator::Release (rdma_endpoint_));
303+ RETURN_ON_ERROR (
304+ RDMAClientCreator::Release (RDMAClientCreator::buildConnectionKey (
305+ rdma_endpoint_, src_rdma_endpoint_)));
289306
290307 return Status::OK ();
291308}
0 commit comments