@@ -62,6 +62,9 @@ static void lost_connection(pmix_peer_t *peer, pmix_status_t err)
6262 pmix_regevents_info_t * reginfoptr , * regnext ;
6363 pmix_peer_events_info_t * pr , * pnext ;
6464 pmix_rank_info_t * info , * pinfo ;
65+ pmix_ptl_posted_recv_t * rcv ;
66+ pmix_buffer_t buf ;
67+ pmix_ptl_hdr_t hdr ;
6568
6669 /* stop all events */
6770 if (peer -> recv_ev_active ) {
@@ -143,6 +146,25 @@ static void lost_connection(pmix_peer_t *peer, pmix_status_t err)
143146 pmix_globals .connected = false;
144147 /* set the public error status */
145148 err = PMIX_ERR_LOST_CONNECTION_TO_SERVER ;
149+ /* it is possible that we have sendrecv's in progress where
150+ * we are waiting for a response to arrive. Since we have
151+ * lost connection to the server, that will never happen.
152+ * Thus, to preclude any chance of hanging, cycle thru
153+ * the list of posted recvs and complete any that are
154+ * the return call from a sendrecv - i.e., any that are
155+ * waiting on dynamic tags */
156+ PMIX_CONSTRUCT (& buf , pmix_buffer_t );
157+ hdr .nbytes = 0 ; // initialize the hdr to something safe
158+ PMIX_LIST_FOREACH (rcv , & pmix_ptl_globals .posted_recvs , pmix_ptl_posted_recv_t ) {
159+ if (PMIX_PTL_TAG_DYNAMIC <= rcv -> tag && UINT_MAX != rcv -> tag ) {
160+ if (NULL != rcv -> cbfunc ) {
161+ /* construct and load the buffer */
162+ hdr .tag = rcv -> tag ;
163+ rcv -> cbfunc (pmix_globals .mypeer , & hdr , & buf , rcv -> cbdata );
164+ }
165+ }
166+ }
167+ PMIX_DESTRUCT (& buf );
146168 }
147169 PMIX_REPORT_EVENT (err , _notify_complete );
148170}
0 commit comments