Skip to content

Commit d34dbf1

Browse files
author
Ralph Castain
authored
Merge pull request #3942 from artpol84/dmdx_err_resp/master
orte/pmix/server: Fix direct modex response with error status
2 parents 0386c26 + 79c10c8 commit d34dbf1

File tree

1 file changed

+19
-11
lines changed

1 file changed

+19
-11
lines changed

orte/orted/pmix/pmix_server.c

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
* Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved.
1515
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
1616
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
17-
* Copyright (c) 2014 Mellanox Technologies, Inc.
17+
* Copyright (c) 2014-2017 Mellanox Technologies, Inc.
1818
* All rights reserved.
1919
* Copyright (c) 2014-2015 Research Organization for Information Science
2020
* and Technology (RIST). All rights reserved.
@@ -322,7 +322,7 @@ void pmix_server_finalize(void)
322322
}
323323

324324
static void send_error(int status, opal_process_name_t *idreq,
325-
orte_process_name_t *remote)
325+
orte_process_name_t *remote, int remote_room)
326326
{
327327
opal_buffer_t *reply;
328328
int rc;
@@ -331,21 +331,29 @@ static void send_error(int status, opal_process_name_t *idreq,
331331
/* pack the status */
332332
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &status, 1, OPAL_INT))) {
333333
ORTE_ERROR_LOG(rc);
334-
OBJ_RELEASE(reply);
335-
return;
334+
goto error;
336335
}
337336
/* pack the id of the requested proc */
338337
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, idreq, 1, OPAL_NAME))) {
339338
ORTE_ERROR_LOG(rc);
340-
OBJ_RELEASE(reply);
341-
return;
339+
goto error;
340+
}
341+
342+
/* pack the remote daemon's request room number */
343+
if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &remote_room, 1, OPAL_INT))) {
344+
ORTE_ERROR_LOG(rc);
345+
goto error;
342346
}
347+
343348
/* send the response */
344349
orte_rml.send_buffer_nb(orte_mgmt_conduit,
345350
remote, reply,
346351
ORTE_RML_TAG_DIRECT_MODEX_RESP,
347352
orte_rml_send_callback, NULL);
348353
return;
354+
error:
355+
OBJ_RELEASE(reply);
356+
return;
349357
}
350358

351359
static void _mdxresp(int sd, short args, void *cbdata)
@@ -472,18 +480,18 @@ static void pmix_server_dmdx_recv(int status, orte_process_name_t* sender,
472480
if (OPAL_SUCCESS != (rc = opal_hotel_checkin(&orte_pmix_server_globals.reqs, req, &req->room_num))) {
473481
orte_show_help("help-orted.txt", "noroom", true, req->operation, orte_pmix_server_globals.num_rooms);
474482
OBJ_RELEASE(req);
475-
send_error(rc, &idreq, sender);
483+
send_error(rc, &idreq, sender, room_num);
476484
}
477485
return;
478486
}
479487
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, name.vpid))) {
480488
/* this is truly an error, so notify the sender */
481-
send_error(ORTE_ERR_NOT_FOUND, &idreq, sender);
489+
send_error(ORTE_ERR_NOT_FOUND, &idreq, sender, room_num);
482490
return;
483491
}
484492
if (!ORTE_FLAG_TEST(proc, ORTE_PROC_FLAG_LOCAL)) {
485493
/* send back an error - they obviously have made a mistake */
486-
send_error(ORTE_ERR_NOT_FOUND, &idreq, sender);
494+
send_error(ORTE_ERR_NOT_FOUND, &idreq, sender, room_num);
487495
return;
488496
}
489497
/* track the request since the call down to the PMIx server
@@ -499,7 +507,7 @@ static void pmix_server_dmdx_recv(int status, orte_process_name_t* sender,
499507
if (OPAL_SUCCESS != (rc = opal_hotel_checkin(&orte_pmix_server_globals.reqs, req, &req->room_num))) {
500508
orte_show_help("help-orted.txt", "noroom", true, req->operation, orte_pmix_server_globals.num_rooms);
501509
OBJ_RELEASE(req);
502-
send_error(rc, &idreq, sender);
510+
send_error(rc, &idreq, sender, room_num);
503511
return;
504512
}
505513

@@ -508,7 +516,7 @@ static void pmix_server_dmdx_recv(int status, orte_process_name_t* sender,
508516
ORTE_ERROR_LOG(rc);
509517
opal_hotel_checkout(&orte_pmix_server_globals.reqs, req->room_num);
510518
OBJ_RELEASE(req);
511-
send_error(rc, &idreq, sender);
519+
send_error(rc, &idreq, sender, room_num);
512520
return;
513521
}
514522
return;

0 commit comments

Comments
 (0)