Skip to content

Commit 3445577

Browse files
committed
Avoid race conditions during BTP TCP handshake.
In some rare cases when a process receives the connect ack while locally updating the peer endpoint structure, we could drop the incomming connect ack due to the fact that the send handler is protected with a try lock (on the endpoint) and our initial send event was not persistent. Making the send event persistent solves all issues.
1 parent 6e6ed62 commit 3445577

File tree

1 file changed

+10
-19
lines changed

1 file changed

+10
-19
lines changed

opal/mca/btl/tcp/btl_tcp_endpoint.c

Lines changed: 10 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -300,18 +300,17 @@ static inline void mca_btl_tcp_endpoint_event_init(mca_btl_base_endpoint_t* btl_
300300

301301
opal_event_set(mca_btl_tcp_event_base, &btl_endpoint->endpoint_recv_event,
302302
btl_endpoint->endpoint_sd,
303-
OPAL_EV_READ|OPAL_EV_PERSIST,
303+
OPAL_EV_READ | OPAL_EV_PERSIST,
304304
mca_btl_tcp_endpoint_recv_handler,
305305
btl_endpoint );
306306
/**
307-
* The send event should be non persistent until the endpoint is
308-
* completely connected. This means, when the event is created it
309-
* will be fired only once, and when the endpoint is marked as
310-
* CONNECTED the event should be recreated with the correct flags.
307+
* In the multi-threaded case, the send event must be persistent in order
308+
* to avoid missing the connection notification in send_handler due to
309+
* a local handling of the peer process (which holds the lock).
311310
*/
312311
opal_event_set(mca_btl_tcp_event_base, &btl_endpoint->endpoint_send_event,
313312
btl_endpoint->endpoint_sd,
314-
OPAL_EV_WRITE,
313+
OPAL_EV_WRITE | OPAL_EV_PERSIST,
315314
mca_btl_tcp_endpoint_send_handler,
316315
btl_endpoint);
317316
}
@@ -558,13 +557,6 @@ static void mca_btl_tcp_endpoint_connected(mca_btl_base_endpoint_t* btl_endpoint
558557
btl_endpoint->endpoint_retries = 0;
559558
MCA_BTL_TCP_ENDPOINT_DUMP(1, btl_endpoint, true, "READY [endpoint_connected]");
560559

561-
/* Create the send event in a persistent manner. */
562-
opal_event_set(mca_btl_tcp_event_base, &btl_endpoint->endpoint_send_event,
563-
btl_endpoint->endpoint_sd,
564-
OPAL_EV_WRITE | OPAL_EV_PERSIST,
565-
mca_btl_tcp_endpoint_send_handler,
566-
btl_endpoint );
567-
568560
if(opal_list_get_size(&btl_endpoint->endpoint_frags) > 0) {
569561
if(NULL == btl_endpoint->endpoint_send_frag)
570562
btl_endpoint->endpoint_send_frag = (mca_btl_tcp_frag_t*)
@@ -779,6 +771,10 @@ static void mca_btl_tcp_endpoint_complete_connect(mca_btl_base_endpoint_t* btl_e
779771
opal_socklen_t so_length = sizeof(so_error);
780772
struct sockaddr_storage endpoint_addr;
781773

774+
/* Delete the send event notification, as the next step is waiting for the ack
775+
* from the peer. Once this ack is received we will deal with the send notification
776+
* accordingly.
777+
*/
782778
opal_event_del(&btl_endpoint->endpoint_send_event);
783779

784780
mca_btl_tcp_proc_tosocks(btl_endpoint->endpoint_addr, &endpoint_addr);
@@ -802,12 +798,7 @@ static void mca_btl_tcp_endpoint_complete_connect(mca_btl_base_endpoint_t* btl_e
802798
return;
803799
}
804800

805-
/* Do not unregister from receiving send event notifications, instead
806-
* leave the event to trigger once more, and then it will get automatically
807-
* deleted as no send fragments are available.
808-
*/
809-
810-
if(mca_btl_tcp_endpoint_send_connect_ack(btl_endpoint) == OPAL_SUCCESS) {
801+
if(mca_btl_tcp_endpoint_send_connect_ack(btl_endpoint) == OPAL_SUCCESS) {
811802
btl_endpoint->endpoint_state = MCA_BTL_TCP_CONNECT_ACK;
812803
opal_event_add(&btl_endpoint->endpoint_recv_event, 0);
813804
MCA_BTL_TCP_ENDPOINT_DUMP(10, btl_endpoint, false, "event_add(recv) [complete_connect]");

0 commit comments

Comments
 (0)