Skip to content

Commit 710912e

Browse files
jjhurseygpaulsen
authored andcommitted
orte: Add option for mixing of short and long hostnames
* Addresses Epic 103619 * New MCA paramter orte_use_mixed_hostnames (default: 'false') * In some scenarios users expect the short hostname 'node01' to match the long hostname 'node01.mycluster.org'. There are cases in the runtime where the difference causes accounting errors (best case) or failure to launch (worst case). - This parameter causes us to strip off the FQDN from the hostname, leaving on the short name for comparison purposes. - This is similar to the orte_keep_fqdn_hostnames MCA parameter, except works as it's opposite. We did not want interprent the fqdn_hostnames parameter as being set to 'false' (default) as 'use only shortnames' since it is currenlty being interpreted differently for clusters where their might be significance in the short versus long hostnames. (cherry picked from commit 48f3a00a645cb48653a79f2a5375763ce7b9e92e) Conflicts: orte/mca/ras/base/ras_base_allocate.c
1 parent a5c91da commit 710912e

File tree

9 files changed

+90
-30
lines changed

9 files changed

+90
-30
lines changed

orte/mca/ras/base/ras_base_allocate.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
1515
* Copyright (c) 2018 Research Organization for Information Science
1616
* and Technology (RIST). All rights reserved.
17+
* Copyright (c) 2016 IBM Corporation. All rights reserved.
1718
* $COPYRIGHT$
1819
*
1920
* Additional copyrights may follow

orte/mca/rmaps/rank_file/rmaps_rank_file.c

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@
3434
#ifdef HAVE_UNISTD_H
3535
#include <unistd.h>
3636
#endif /* HAVE_UNISTD_H */
37+
#if HAVE_ARPA_INET_H
38+
#include <arpa/inet.h>
39+
#endif
3740
#include <string.h>
3841

3942
#include "opal/util/argv.h"
@@ -501,11 +504,16 @@ static int orte_rmaps_rank_file_parse(const char *rankfile)
501504
}
502505
opal_argv_free (argv);
503506

504-
// Strip off the FQDN if present, ignore IP addresses
505-
if( !orte_keep_fqdn_hostnames && !opal_net_isaddr(node_name) ) {
507+
// Strip off the FQDN if present
508+
if( orte_use_mixed_hostnames ) {
506509
char *ptr;
507-
if (NULL != (ptr = strchr(node_name, '.'))) {
508-
*ptr = '\0';
510+
struct in_addr buf;
511+
/* if the nodename is an IP address, do not mess with it! */
512+
if (0 == inet_pton(AF_INET, node_name, &buf) &&
513+
0 == inet_pton(AF_INET6, node_name, &buf)) {
514+
if (NULL != (ptr = strchr(node_name, '.'))) {
515+
*ptr = '\0';
516+
}
509517
}
510518
}
511519

orte/mca/rmaps/seq/rmaps_seq.c

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -192,11 +192,17 @@ static int orte_rmaps_seq_map(orte_job_t *jdata)
192192
sq->cpuset = strdup(sep);
193193
}
194194

195-
// Strip off the FQDN if present, ignore IP addresses
196-
if( !orte_keep_fqdn_hostnames && !opal_net_isaddr(hstname) ) {
195+
// Strip off the FQDN if present
196+
if( orte_use_mixed_hostnames ) {
197197
char *ptr;
198-
if (NULL != (ptr = strchr(hstname, '.'))) {
199-
*ptr = '\0';
198+
struct in_addr buf;
199+
200+
/* if the nodename is an IP address, do not mess with it! */
201+
if (0 == inet_pton(AF_INET, hstname, &buf) &&
202+
0 == inet_pton(AF_INET6, hstname, &buf)) {
203+
if (NULL != (ptr = strchr(hstname, '.'))) {
204+
*ptr = '\0';
205+
}
200206
}
201207
}
202208

@@ -300,11 +306,16 @@ static int orte_rmaps_seq_map(orte_job_t *jdata)
300306
sq->cpuset = strdup(sep);
301307
}
302308

303-
// Strip off the FQDN if present, ignore IP addresses
304-
if( !orte_keep_fqdn_hostnames && !opal_net_isaddr(hstname) ) {
309+
// Strip off the FQDN if present
310+
if( orte_use_mixed_hostnames ) {
305311
char *ptr;
306-
if (NULL != (ptr = strchr(hstname, '.'))) {
307-
(*ptr) = '\0';
312+
struct in_addr buf;
313+
/* if the nodename is an IP address, do not mess with it! */
314+
if (0 == inet_pton(AF_INET, hstname, &buf) &&
315+
0 == inet_pton(AF_INET6, hstname, &buf)) {
316+
if (NULL != (ptr = strchr(hstname, '.'))) {
317+
(*ptr) = '\0'; // JJH RETURN HERE TO DETERMINE WHY SEGV...
318+
}
308319
}
309320
}
310321

orte/runtime/orte_globals.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ char *orte_oob_static_ports = NULL;
8585
bool orte_standalone_operation = false;
8686
bool orte_fwd_mpirun_port = true;
8787

88+
bool orte_use_mixed_hostnames = false;
8889
bool orte_keep_fqdn_hostnames = false;
8990
bool orte_have_fqdn_allocation = false;
9091
bool orte_show_resolved_nodenames = false;

orte/runtime/orte_globals.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,7 @@ ORTE_DECLSPEC extern bool orte_standalone_operation;
470470
ORTE_DECLSPEC extern bool orte_fwd_mpirun_port;
471471

472472
/* nodename flags */
473+
ORTE_DECLSPEC extern bool orte_use_mixed_hostnames;
473474
ORTE_DECLSPEC extern bool orte_keep_fqdn_hostnames;
474475
ORTE_DECLSPEC extern bool orte_have_fqdn_allocation;
475476
ORTE_DECLSPEC extern bool orte_show_resolved_nodenames;

orte/runtime/orte_mca_params.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,14 @@ int orte_register_params(void)
422422
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
423423
&orte_keep_fqdn_hostnames);
424424

425+
/* whether or not to match short hostnames to FQDN hostnames */
426+
orte_use_mixed_hostnames = false;
427+
(void) mca_base_var_register ("orte", "orte", NULL, "use_mixed_hostnames",
428+
"Whether or not to use mixed hostnames (short == FQDN) [default: no]",
429+
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
430+
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
431+
&orte_use_mixed_hostnames);
432+
425433
/* whether or not to retain aliases of hostnames */
426434
orte_retain_aliases = false;
427435
(void) mca_base_var_register ("orte", "orte", NULL, "retain_aliases",

orte/util/dash_host/dash_host.c

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@
2525

2626
#include <string.h>
2727
#include <ctype.h>
28+
#if HAVE_ARPA_INET_H
29+
#include <arpa/inet.h>
30+
#endif
2831

2932
#include "orte/constants.h"
3033
#include "orte/types.h"
@@ -212,10 +215,17 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
212215
ndname = mini_map[i];
213216
}
214217

215-
// Strip off the FQDN if present, ignore IP addresses
216-
if( !orte_keep_fqdn_hostnames && !opal_net_isaddr(ndname) ) {
217-
if (NULL != (ptr = strchr(ndname, '.'))) {
218-
*ptr = '\0';
218+
219+
// Strip off the FQDN if present
220+
if( orte_use_mixed_hostnames ) {
221+
char *ptr;
222+
struct in_addr buf;
223+
/* if the nodename is an IP address, do not mess with it! */
224+
if (0 == inet_pton(AF_INET, ndname, &buf) &&
225+
0 == inet_pton(AF_INET6, ndname, &buf)) {
226+
if (NULL != (ptr = strchr(ndname, '.'))) {
227+
*ptr = '\0';
228+
}
219229
}
220230
}
221231
/* remove any modifier */

orte/util/hostfile/hostfile.c

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@
2828
#ifdef HAVE_UNISTD_H
2929
#include <unistd.h>
3030
#endif
31+
#if HAVE_ARPA_INET_H
32+
#include <arpa/inet.h>
33+
#endif
3134
#include <errno.h>
3235
#include <string.h>
3336
#include <sys/stat.h>
@@ -166,11 +169,16 @@ static int hostfile_parse_line(int token, opal_list_t* updates,
166169
}
167170
opal_argv_free (argv);
168171

169-
// Strip off the FQDN if present, ignore IP addresses
170-
if( !orte_keep_fqdn_hostnames && !opal_net_isaddr(node_name) ) {
172+
// Strip off the FQDN if present
173+
if( orte_use_mixed_hostnames ) {
171174
char *ptr;
172-
if (NULL != (ptr = strchr(node_name, '.'))) {
173-
*ptr = '\0';
175+
struct in_addr buf;
176+
/* if the nodename is an IP address, do not mess with it! */
177+
if (0 == inet_pton(AF_INET, node_name, &buf) &&
178+
0 == inet_pton(AF_INET6, node_name, &buf)) {
179+
if (NULL != (ptr = strchr(node_name, '.'))) {
180+
*ptr = '\0';
181+
}
174182
}
175183
}
176184

@@ -281,11 +289,16 @@ static int hostfile_parse_line(int token, opal_list_t* updates,
281289
}
282290
opal_argv_free (argv);
283291

284-
// Strip off the FQDN if present, ignore IP addresses
285-
if( !orte_keep_fqdn_hostnames && !opal_net_isaddr(node_name) ) {
292+
// Strip off the FQDN if present
293+
if( orte_use_mixed_hostnames ) {
286294
char *ptr;
287-
if (NULL != (ptr = strchr(node_name, '.'))) {
288-
*ptr = '\0';
295+
struct in_addr buf;
296+
/* if the nodename is an IP address, do not mess with it! */
297+
if (0 == inet_pton(AF_INET, node_name, &buf) &&
298+
0 == inet_pton(AF_INET6, node_name, &buf)) {
299+
if (NULL != (ptr = strchr(node_name, '.'))) {
300+
*ptr = '\0';
301+
}
289302
}
290303
}
291304

orte/util/proc_info.c

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949

5050
/* provide a connection to a reqd variable */
5151
extern bool orte_keep_fqdn_hostnames;
52+
extern bool orte_use_mixed_hostnames;
5253

5354
#define ORTE_NAME_INVALID {ORTE_JOBID_INVALID, ORTE_VPID_INVALID}
5455

@@ -169,12 +170,18 @@ int orte_proc_info(void)
169170
/* add this to our list of aliases */
170171
opal_argv_append_nosize(&orte_process_info.aliases, hostname);
171172

172-
// Strip off the FQDN if present, ignore IP addresses
173-
if( !orte_keep_fqdn_hostnames && !opal_net_isaddr(hostname) ) {
174-
if (NULL != (ptr = strchr(hostname, '.'))) {
175-
*ptr = '\0';
176-
/* add this to our list of aliases */
177-
opal_argv_append_nosize(&orte_process_info.aliases, hostname);
173+
// These two options should be mutually exclusive
174+
// Cannot specify orte_use_mixed_hostnames=true and orte_keep_fqdn_hostnames=true
175+
if (!orte_keep_fqdn_hostnames || orte_use_mixed_hostnames) {
176+
/* if the nodename is an IP address, do not mess with it! */
177+
if (0 == inet_pton(AF_INET, hostname, &buf) &&
178+
0 == inet_pton(AF_INET6, hostname, &buf)) {
179+
/* not an IP address, so remove any domain info */
180+
if (NULL != (ptr = strchr(hostname, '.'))) {
181+
*ptr = '\0';
182+
/* add this to our list of aliases */
183+
opal_argv_append_nosize(&orte_process_info.aliases, hostname);
184+
}
178185
}
179186
}
180187

0 commit comments

Comments
 (0)