Skip to content

Commit 5b8a40a

Browse files
author
rhc54
committed
Merge pull request #1528 from hpcraink/pr/osx_sun_path
OSX tempdir too long for sun_path
2 parents 896f857 + ad690a4 commit 5b8a40a

File tree

6 files changed

+30
-9
lines changed

6 files changed

+30
-9
lines changed

opal/mca/pmix/pmix120/pmix/src/server/pmix_server.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module)
143143
{
144144
int debug_level;
145145
char *tdir, *evar;
146+
char * pmix_pid;
146147
pid_t pid;
147148

148149
/* initialize the output system */
@@ -219,7 +220,14 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module)
219220
/* now set the address - we use the pid here to reduce collisions */
220221
memset(&myaddress, 0, sizeof(struct sockaddr_un));
221222
myaddress.sun_family = AF_UNIX;
222-
snprintf(myaddress.sun_path, sizeof(myaddress.sun_path)-1, "%s/pmix-%d", tdir, pid);
223+
asprintf(&pmix_pid, "pmix-%d", pid);
224+
// If the above set temporary directory name plus the pmix-PID string
225+
// plus the '/' separator are too long, just fail, so the caller
226+
// may provide the user with a proper help... *Cough*, *Cough* OSX...
227+
if ((strlen(tdir) + strlen(pmix_pid) + 1) > sizeof(myaddress.sun_path)-1) {
228+
return PMIX_ERR_INVALID_LENGTH;
229+
}
230+
snprintf(myaddress.sun_path, sizeof(myaddress.sun_path)-1, "%s/%s", tdir, pmix_pid);
223231
asprintf(&myuri, "%s:%lu:%s", pmix_globals.myid.nspace, (unsigned long)pmix_globals.myid.rank, myaddress.sun_path);
224232

225233

opal/mca/pmix/pmix120/pmix/src/server/pmix_server_listener.c

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -72,19 +72,20 @@ pmix_status_t pmix_start_listening(struct sockaddr_un *address)
7272
{
7373
int flags;
7474
pmix_status_t rc;
75-
unsigned int addrlen;
75+
socklen_t addrlen;
7676
char *ptr;
7777

7878
/* create a listen socket for incoming connection attempts */
7979
pmix_server_globals.listen_socket = socket(PF_UNIX, SOCK_STREAM, 0);
8080
if (pmix_server_globals.listen_socket < 0) {
81-
printf("%s:%d socket() failed", __FILE__, __LINE__);
81+
printf("%s:%d socket() failed\n", __FILE__, __LINE__);
8282
return PMIX_ERROR;
8383
}
8484

8585
addrlen = sizeof(struct sockaddr_un);
8686
if (bind(pmix_server_globals.listen_socket, (struct sockaddr*)address, addrlen) < 0) {
87-
printf("%s:%d bind() failed", __FILE__, __LINE__);
87+
printf("%s:%d bind() failed error:%s\n", __FILE__, __LINE__,
88+
strerror(errno));
8889
return PMIX_ERROR;
8990
}
9091
/* set the mode as required */
@@ -95,18 +96,18 @@ pmix_status_t pmix_start_listening(struct sockaddr_un *address)
9596

9697
/* setup listen backlog to maximum allowed by kernel */
9798
if (listen(pmix_server_globals.listen_socket, SOMAXCONN) < 0) {
98-
printf("%s:%d listen() failed", __FILE__, __LINE__);
99+
printf("%s:%d listen() failed\n", __FILE__, __LINE__);
99100
return PMIX_ERROR;
100101
}
101102

102103
/* set socket up to be non-blocking, otherwise accept could block */
103104
if ((flags = fcntl(pmix_server_globals.listen_socket, F_GETFL, 0)) < 0) {
104-
printf("%s:%d fcntl(F_GETFL) failed", __FILE__, __LINE__);
105+
printf("%s:%d fcntl(F_GETFL) failed\n", __FILE__, __LINE__);
105106
return PMIX_ERROR;
106107
}
107108
flags |= O_NONBLOCK;
108109
if (fcntl(pmix_server_globals.listen_socket, F_SETFL, flags) < 0) {
109-
printf("%s:%d fcntl(F_SETFL) failed", __FILE__, __LINE__);
110+
printf("%s:%d fcntl(F_SETFL) failed\n", __FILE__, __LINE__);
110111
return PMIX_ERROR;
111112
}
112113

orte/mca/ess/base/ess_base_std_orted.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -515,7 +515,7 @@ int orte_ess_base_orted_setup(char **hosts)
515515
/* setup the PMIx server */
516516
if (ORTE_SUCCESS != (ret = pmix_server_init())) {
517517
ORTE_ERROR_LOG(ret);
518-
error = "pmix server init";
518+
error = "Try a shorter TMPDIR var. or change your computer's name (see uname -n), since pmix_server_init";
519519
goto error;
520520
}
521521

orte/mca/ess/hnp/ess_hnp_module.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -634,7 +634,7 @@ static int rte_init(void)
634634
/* setup the PMIx server */
635635
if (ORTE_SUCCESS != (ret = pmix_server_init())) {
636636
ORTE_ERROR_LOG(ret);
637-
error = "pmix server init";
637+
error = "Try a shorter TMPDIR var. or change your computer's name (see uname -n), since pmix_server_init";
638638
goto error;
639639
}
640640

orte/orted/pmix/pmix_server.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,9 @@ int pmix_server_init(void)
246246
if (ORTE_SUCCESS != (rc = opal_pmix.server_init(&pmix_server, &info))) {
247247
ORTE_ERROR_LOG(rc);
248248
/* memory cleanup will occur when finalize is called */
249+
orte_show_help("help-orterun.txt", "orterun:pmix-failed", true,
250+
orte_process_info.proc_session_dir);
251+
return rc;
249252
}
250253
OPAL_LIST_DESTRUCT(&info);
251254

orte/tools/orterun/help-orterun.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -660,3 +660,12 @@ method and try launching your job again.
660660

661661
Your job will now abort.
662662
#
663+
[orterun:pmix-failed]
664+
The call to pmix_init_server() failed. This may be due to your
665+
system's restriction for Unix's socket's path-length.
666+
667+
orte_proc_session_dir: %s
668+
669+
Please try to set TMPDIR to something short (like /tmp) or change
670+
Your computer's name (see uname -n).
671+
#

0 commit comments

Comments
 (0)