Skip to content

Commit 840d6c9

Browse files
author
Ralph Castain
authored
Merge pull request #3284 from rhc54/topic/hotel
Resolve the direct modex race condition.
2 parents 8d1369d + b7e9711 commit 840d6c9

File tree

4 files changed

+41
-5
lines changed

4 files changed

+41
-5
lines changed

orte/orted/help-orted.txt

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
# University of Stuttgart. All rights reserved.
1111
# Copyright (c) 2004-2005 The Regents of the University of California.
1212
# All rights reserved.
13-
# Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
13+
# Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
1414
# $COPYRIGHT$
1515
#
1616
# Additional copyrights may follow
@@ -60,3 +60,11 @@ info key:
6060
key: %s
6161

6262
The operation will continue, but may not behave completely as expected.
63+
#
64+
[timedout]
65+
A request has timed out and will therefore fail:
66+
67+
Operation: %s
68+
69+
Your job may terminate as a result of this problem. You may want to
70+
adjust the MCA parameter pmix_server_max_wait and try again.

orte/orted/pmix/pmix_server.c

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,8 @@ static void pmix_server_dmdx_resp(int status, orte_process_name_t* sender,
8383
opal_buffer_t *buffer,
8484
orte_rml_tag_t tg, void *cbdata);
8585

86+
#define ORTE_PMIX_SERVER_MIN_ROOMS 4096
87+
8688
pmix_server_globals_t orte_pmix_server_globals = {0};
8789

8890
static opal_pmix_server_module_t pmix_server = {
@@ -122,7 +124,7 @@ void pmix_server_register_params(void)
122124
orte_pmix_server_globals.verbosity);
123125
}
124126
/* specify the size of the hotel */
125-
orte_pmix_server_globals.num_rooms = 256;
127+
orte_pmix_server_globals.num_rooms = -1;
126128
(void) mca_base_var_register ("orte", "pmix", NULL, "server_max_reqs",
127129
"Maximum number of backlogged PMIx server direct modex requests",
128130
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
@@ -158,7 +160,7 @@ static void eviction_cbfunc(struct opal_hotel_t *hotel,
158160
{
159161
pmix_server_req_t *req = (pmix_server_req_t*)occupant;
160162
bool timeout = false;
161-
int rc;
163+
int rc=OPAL_ERR_TIMEOUT;
162164

163165
/* decrement the request timeout */
164166
req->timeout -= orte_pmix_server_globals.timeout;
@@ -175,6 +177,8 @@ static void eviction_cbfunc(struct opal_hotel_t *hotel,
175177
}
176178
ORTE_ERROR_LOG(rc);
177179
/* fall thru and return an error so the caller doesn't hang */
180+
} else {
181+
orte_show_help("help-orted.txt", "timedout", true, req->operation);
178182
}
179183
/* don't let the caller hang */
180184
if (NULL != req->opcbfunc) {
@@ -205,6 +209,17 @@ int pmix_server_init(void)
205209

206210
/* setup the server's state variables */
207211
OBJ_CONSTRUCT(&orte_pmix_server_globals.reqs, opal_hotel_t);
212+
/* by the time we init the server, we should know how many nodes we
213+
* have in our environment - with the exception of mpirun. If the
214+
* user specified the size of the hotel, then use that value. Otherwise,
215+
* set the value to something large to avoid running out of rooms on
216+
* large machines */
217+
if (-1 == orte_pmix_server_globals.num_rooms) {
218+
orte_pmix_server_globals.num_rooms = orte_process_info.num_procs * 2;
219+
if (orte_pmix_server_globals.num_rooms < ORTE_PMIX_SERVER_MIN_ROOMS) {
220+
orte_pmix_server_globals.num_rooms = ORTE_PMIX_SERVER_MIN_ROOMS;
221+
}
222+
}
208223
if (OPAL_SUCCESS != (rc = opal_hotel_init(&orte_pmix_server_globals.reqs,
209224
orte_pmix_server_globals.num_rooms,
210225
orte_event_base, orte_pmix_server_globals.timeout*1000000,
@@ -533,13 +548,15 @@ static void pmix_server_dmdx_recv(int status, orte_process_name_t* sender,
533548
* condition, so just log the request and we will fill
534549
* it later */
535550
req = OBJ_NEW(pmix_server_req_t);
551+
(void)asprintf(&req->operation, "DMDX: %s:%d", __FILE__, __LINE__);
536552
req->proxy = *sender;
537553
req->target = idreq;
538554
req->remote_room_num = room_num;
539555
/* adjust the timeout to reflect the size of the job as it can take some
540556
* amount of time to start the job */
541557
ORTE_ADJUST_TIMEOUT(req);
542558
if (OPAL_SUCCESS != (rc = opal_hotel_checkin(&orte_pmix_server_globals.reqs, req, &req->room_num))) {
559+
ORTE_ERROR_LOG(rc);
543560
OBJ_RELEASE(req);
544561
send_error(rc, &idreq, sender);
545562
}
@@ -558,13 +575,15 @@ static void pmix_server_dmdx_recv(int status, orte_process_name_t* sender,
558575
/* track the request since the call down to the PMIx server
559576
* is asynchronous */
560577
req = OBJ_NEW(pmix_server_req_t);
578+
(void)asprintf(&req->operation, "DMDX: %s:%d", __FILE__, __LINE__);
561579
req->proxy = *sender;
562580
req->target = idreq;
563581
req->remote_room_num = room_num;
564582
/* adjust the timeout to reflect the size of the job as it can take some
565583
* amount of time to start the job */
566584
ORTE_ADJUST_TIMEOUT(req);
567585
if (OPAL_SUCCESS != (rc = opal_hotel_checkin(&orte_pmix_server_globals.reqs, req, &req->room_num))) {
586+
ORTE_ERROR_LOG(rc);
568587
OBJ_RELEASE(req);
569588
send_error(rc, &idreq, sender);
570589
return;
@@ -696,6 +715,7 @@ OBJ_CLASS_INSTANCE(orte_pmix_server_op_caddy_t,
696715

697716
static void rqcon(pmix_server_req_t *p)
698717
{
718+
p->operation = NULL;
699719
p->target = *ORTE_NAME_INVALID;
700720
p->proxy = *ORTE_NAME_INVALID;
701721
p->timeout = orte_pmix_server_globals.timeout;
@@ -710,6 +730,9 @@ static void rqcon(pmix_server_req_t *p)
710730
}
711731
static void rqdes(pmix_server_req_t *p)
712732
{
733+
if (NULL != p->operation) {
734+
free(p->operation);
735+
}
713736
if (NULL != p->jdata) {
714737
OBJ_RELEASE(p->jdata);
715738
}

orte/orted/pmix/pmix_server_internal.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
typedef struct {
6363
opal_object_t super;
6464
opal_event_t ev;
65+
char *operation;
6566
int status;
6667
int timeout;
6768
int room_num;
@@ -109,6 +110,7 @@ OBJ_CLASS_DECLARATION(orte_pmix_mdx_caddy_t);
109110
do { \
110111
pmix_server_req_t *_req; \
111112
_req = OBJ_NEW(pmix_server_req_t); \
113+
(void)asprintf(&_req->operation, "DMDX: %s:%d", __FILE__, __LINE__); \
112114
_req->target = (p); \
113115
_req->mdxcbfunc = (ocf); \
114116
_req->cbdata = (ocd); \
@@ -122,6 +124,7 @@ OBJ_CLASS_DECLARATION(orte_pmix_mdx_caddy_t);
122124
do { \
123125
pmix_server_req_t *_req; \
124126
_req = OBJ_NEW(pmix_server_req_t); \
127+
(void)asprintf(&_req->operation, "SPAWN: %s:%d", __FILE__, __LINE__); \
125128
_req->jdata = (j); \
126129
_req->spcbfunc = (ocf); \
127130
_req->cbdata = (ocd); \

orte/orted/pmix/pmix_server_pub.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
* All rights reserved.
1414
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
1515
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
16-
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
16+
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
1717
* Copyright (c) 2014 Mellanox Technologies, Inc.
1818
* All rights reserved.
1919
* Copyright (c) 2014-2016 Research Organization for Information Science
@@ -100,6 +100,7 @@ int pmix_server_publish_fn(opal_process_name_t *proc,
100100

101101
/* create the caddy */
102102
req = OBJ_NEW(pmix_server_req_t);
103+
(void)asprintf(&req->operation, "PUBLISH: %s:%d", __FILE__, __LINE__);
103104
req->opcbfunc = cbfunc;
104105
req->cbdata = cbdata;
105106

@@ -207,6 +208,7 @@ int pmix_server_lookup_fn(opal_process_name_t *proc, char **keys,
207208

208209
/* create the caddy */
209210
req = OBJ_NEW(pmix_server_req_t);
211+
(void)asprintf(&req->operation, "LOOKUP: %s:%d", __FILE__, __LINE__);
210212
req->lkcbfunc = cbfunc;
211213
req->cbdata = cbdata;
212214

@@ -302,6 +304,7 @@ int pmix_server_unpublish_fn(opal_process_name_t *proc, char **keys,
302304

303305
/* create the caddy */
304306
req = OBJ_NEW(pmix_server_req_t);
307+
(void)asprintf(&req->operation, "UNPUBLISH: %s:%d", __FILE__, __LINE__);
305308
req->opcbfunc = cbfunc;
306309
req->cbdata = cbdata;
307310

@@ -468,4 +471,3 @@ void pmix_server_keyval_client(int status, orte_process_name_t* sender,
468471
OBJ_RELEASE(req);
469472
}
470473
}
471-

0 commit comments

Comments
 (0)