Skip to content

Commit 201f857

Browse files
author
Ralph Castain
committed
Ensure we retain the peer object until we are done with it, then detect that the socket has closed due to a lost connection and cleanly release the message event
Signed-off-by: Ralph Castain <[email protected]>
1 parent ba47f73 commit 201f857

File tree

4 files changed

+28
-8
lines changed

4 files changed

+28
-8
lines changed

opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,16 @@ PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_listener_t,
222222
pmix_list_item_t,
223223
lcon, ldes);
224224

225+
static void qcon(pmix_ptl_queue_t *p)
226+
{
227+
p->peer = NULL;
228+
}
229+
static void qdes(pmix_ptl_queue_t *p)
230+
{
231+
if (NULL != p->peer) {
232+
PMIX_RELEASE(p->peer);
233+
}
234+
}
225235
PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_ptl_queue_t,
226236
pmix_object_t,
227-
NULL, NULL);
237+
qcon, qdes);

opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -470,18 +470,20 @@ void pmix_ptl_base_send(int sd, short args, void *cbdata)
470470
{
471471
pmix_ptl_queue_t *queue = (pmix_ptl_queue_t*)cbdata;
472472
pmix_ptl_send_t *snd;
473+
474+
if (NULL == queue->peer || queue->peer->sd < 0 ||
475+
NULL == queue->peer->info || NULL == queue->peer->info->nptr) {
476+
/* this peer has lost connection */
477+
PMIX_RELEASE(queue);
478+
return;
479+
}
480+
473481
pmix_output_verbose(2, pmix_globals.debug_output,
474482
"[%s:%d] send to %s:%d on tag %d",
475483
__FILE__, __LINE__,
476484
(queue->peer)->info->nptr->nspace,
477485
(queue->peer)->info->rank, (queue->tag));
478486

479-
if (queue->peer->sd < 0) {
480-
/* this peer's socket has been closed */
481-
PMIX_RELEASE(queue);
482-
return;
483-
}
484-
485487
snd = PMIX_NEW(pmix_ptl_send_t);
486488
snd->hdr.pindex = htonl(pmix_globals.pindex);
487489
snd->hdr.tag = htonl(queue->tag);

opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,9 +335,11 @@ static pmix_status_t send_recv(struct pmix_peer_t *peer,
335335
void *cbdata)
336336
{
337337
pmix_ptl_sr_t *ms;
338+
338339
pmix_output_verbose(5, pmix_globals.debug_output,
339340
"[%s:%d] post send to server",
340341
__FILE__, __LINE__);
342+
341343
ms = PMIX_NEW(pmix_ptl_sr_t);
342344
ms->peer = peer;
343345
ms->bfr = bfr;
@@ -354,11 +356,13 @@ static pmix_status_t send_oneway(struct pmix_peer_t *peer,
354356
pmix_ptl_tag_t tag)
355357
{
356358
pmix_ptl_queue_t *q;
359+
pmix_peer_t *pr = (pmix_peer_t*)peer;
357360

358361
/* we have to transfer this to an event for thread
359362
* safety as we need to post this message on the
360363
* peer's send queue */
361364
q = PMIX_NEW(pmix_ptl_queue_t);
365+
OBJ_RETAIN(pr);
362366
q->peer = peer;
363367
q->buf = bfr;
364368
q->tag = tag;

opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
* Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved.
1414
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
1515
* reserved.
16-
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
16+
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
1717
* $COPYRIGHT$
1818
*
1919
* Additional copyrights may follow
@@ -189,9 +189,11 @@ static pmix_status_t send_recv(struct pmix_peer_t *peer,
189189
void *cbdata)
190190
{
191191
pmix_ptl_sr_t *ms;
192+
192193
pmix_output_verbose(5, pmix_globals.debug_output,
193194
"[%s:%d] post send to server",
194195
__FILE__, __LINE__);
196+
195197
ms = PMIX_NEW(pmix_ptl_sr_t);
196198
ms->peer = peer;
197199
ms->bfr = bfr;
@@ -208,11 +210,13 @@ static pmix_status_t send_oneway(struct pmix_peer_t *peer,
208210
pmix_ptl_tag_t tag)
209211
{
210212
pmix_ptl_queue_t *q;
213+
pmix_peer_t *pr = (pmix_peer_t*)peer;
211214

212215
/* we have to transfer this to an event for thread
213216
* safety as we need to post this message on the
214217
* peer's send queue */
215218
q = PMIX_NEW(pmix_ptl_queue_t);
219+
OBJ_RETAIN(pr);
216220
q->peer = peer;
217221
q->buf = bfr;
218222
q->tag = tag;

0 commit comments

Comments
 (0)