Skip to content

Commit eadff28

Browse files
committed
orte: Add option for mixing of short and long hostnames
* New MCA paramter orte_use_mixed_hostnames (default: 'false') * In some scenarios users expect the short hostname 'node01' to match the long hostname 'node01.mycluster.org'. There are cases in the runtime where the difference causes accounting errors (best case) or failure to launch (worst case). - This parameter causes us to strip off the FQDN from the hostname, leaving on the short name for comparison purposes. - This is similar to the orte_keep_fqdn_hostnames MCA parameter, except works as it's opposite. We did not want interprent the fqdn_hostnames parameter as being set to 'false' (default) as 'use only shortnames' since it is currenlty being interpreted differently for clusters where their might be significance in the short versus long hostnames.
1 parent 19b0f4d commit eadff28

File tree

10 files changed

+137
-5
lines changed

10 files changed

+137
-5
lines changed

orte/mca/plm/base/plm_base_launch_support.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
1717
* Copyright (c) 2014-2015 Research Organization for Information Science
1818
* and Technology (RIST). All rights reserved.
19+
* Copyright (c) 2016 IBM Corporation. All rights reserved.
1920
* $COPYRIGHT$
2021
*
2122
* Additional copyrights may follow
@@ -1528,7 +1529,7 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata)
15281529
bool one_filter = false;
15291530
int num_nodes;
15301531
bool default_hostfile_used;
1531-
char *hosts;
1532+
char *hosts = NULL;
15321533
bool singleton=false;
15331534
bool multi_sim = false;
15341535

orte/mca/rmaps/base/rmaps_base_support_fns.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
1414
* All rights reserved.
1515
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
16+
* Copyright (c) 2016 IBM Corporation. All rights reserved.
1617
* $COPYRIGHT$
1718
*
1819
* Additional copyrights may follow
@@ -147,7 +148,7 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
147148
orte_job_t *daemons;
148149
bool novm;
149150
opal_list_t nodes;
150-
char *hosts;
151+
char *hosts = NULL;
151152

152153
/** set default answer */
153154
*total_num_slots = 0;

orte/mca/rmaps/rank_file/rmaps_rank_file.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
1818
* Copyright (c) 2015 Research Organization for Information Science
1919
* and Technology (RIST). All rights reserved.
20+
* Copyright (c) 2016 IBM Corporation. All rights reserved.
2021
*
2122
* $COPYRIGHT$
2223
*
@@ -33,6 +34,9 @@
3334
#ifdef HAVE_UNISTD_H
3435
#include <unistd.h>
3536
#endif /* HAVE_UNISTD_H */
37+
#if HAVE_ARPA_INET_H
38+
#include <arpa/inet.h>
39+
#endif
3640
#include <string.h>
3741

3842
#include "opal/util/argv.h"
@@ -488,6 +492,20 @@ static int orte_rmaps_rank_file_parse(const char *rankfile)
488492
goto unlock;
489493
}
490494
opal_argv_free (argv);
495+
496+
// Strip off the FQDN if present
497+
if( orte_use_mixed_hostnames ) {
498+
char *ptr;
499+
struct in_addr buf;
500+
/* if the nodename is an IP address, do not mess with it! */
501+
if (0 == inet_pton(AF_INET, node_name, &buf) &&
502+
0 == inet_pton(AF_INET6, node_name, &buf)) {
503+
if (NULL != (ptr = strchr(node_name, '.'))) {
504+
*ptr = '\0';
505+
}
506+
}
507+
}
508+
491509
/* check the rank item */
492510
if (NULL == rfmap) {
493511
orte_show_help("help-rmaps_rank_file.txt", "bad-syntax", true, rankfile);

orte/mca/rmaps/seq/rmaps_seq.c

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
1616
* Copyright (c) 2015 Research Organization for Information Science
1717
* and Technology (RIST). All rights reserved.
18+
* Copyright (c) 2016 IBM Corporation. All rights reserved.
1819
* $COPYRIGHT$
1920
*
2021
* Additional copyrights may follow
@@ -70,9 +71,11 @@ static void sn_des(seq_node_t *p)
7071
{
7172
if (NULL != p->hostname) {
7273
free(p->hostname);
74+
p->hostname = NULL;
7375
}
7476
if (NULL != p->cpuset) {
7577
free(p->cpuset);
78+
p->cpuset = NULL;
7679
}
7780
}
7881
OBJ_CLASS_INSTANCE(seq_node_t,
@@ -101,7 +104,7 @@ static int orte_rmaps_seq_map(orte_job_t *jdata)
101104
opal_list_t node_list, *seq_list, sq_list;
102105
orte_proc_t *proc;
103106
mca_base_component_t *c = &mca_rmaps_seq_component.base_version;
104-
char *hosts, *sep, *eptr;
107+
char *hosts = NULL, *sep, *eptr;
105108
FILE *fp;
106109
opal_hwloc_resource_type_t rtype;
107110

@@ -156,7 +159,7 @@ static int orte_rmaps_seq_map(orte_job_t *jdata)
156159
/* if there is a default hostfile, go and get its ordered list of nodes */
157160
OBJ_CONSTRUCT(&default_seq_list, opal_list_t);
158161
if (NULL != orte_default_hostfile) {
159-
char *hstname;
162+
char *hstname = NULL;
160163
/* open the file */
161164
fp = fopen(orte_default_hostfile, "r");
162165
if (NULL == fp) {
@@ -170,6 +173,11 @@ static int orte_rmaps_seq_map(orte_job_t *jdata)
170173
/* blank line - ignore */
171174
continue;
172175
}
176+
if( '#' == hstname[0] ) {
177+
free(hstname);
178+
/* Comment line - ignore */
179+
continue;
180+
}
173181
sq = OBJ_NEW(seq_node_t);
174182
if (NULL != (sep = strchr(hstname, ' '))) {
175183
*sep = '\0';
@@ -182,6 +190,21 @@ static int orte_rmaps_seq_map(orte_job_t *jdata)
182190
*(eptr+1) = 0;
183191
sq->cpuset = strdup(sep);
184192
}
193+
194+
// Strip off the FQDN if present
195+
if( orte_use_mixed_hostnames ) {
196+
char *ptr;
197+
struct in_addr buf;
198+
199+
/* if the nodename is an IP address, do not mess with it! */
200+
if (0 == inet_pton(AF_INET, hstname, &buf) &&
201+
0 == inet_pton(AF_INET6, hstname, &buf)) {
202+
if (NULL != (ptr = strchr(hstname, '.'))) {
203+
*ptr = '\0';
204+
}
205+
}
206+
}
207+
185208
sq->hostname = hstname;
186209
opal_list_append(&default_seq_list, &sq->super);
187210
}
@@ -255,6 +278,16 @@ static int orte_rmaps_seq_map(orte_job_t *jdata)
255278
goto error;
256279
}
257280
while (NULL != (hstname = orte_getline(fp))) {
281+
if (0 == strlen(hstname)) {
282+
free(hstname);
283+
/* blank line - ignore */
284+
continue;
285+
}
286+
if( '#' == hstname[0] ) {
287+
free(hstname);
288+
/* Comment line - ignore */
289+
continue;
290+
}
258291
sq = OBJ_NEW(seq_node_t);
259292
if (NULL != (sep = strchr(hstname, ' '))) {
260293
*sep = '\0';
@@ -267,6 +300,20 @@ static int orte_rmaps_seq_map(orte_job_t *jdata)
267300
*(eptr+1) = 0;
268301
sq->cpuset = strdup(sep);
269302
}
303+
304+
// Strip off the FQDN if present
305+
if( orte_use_mixed_hostnames ) {
306+
char *ptr;
307+
struct in_addr buf;
308+
/* if the nodename is an IP address, do not mess with it! */
309+
if (0 == inet_pton(AF_INET, hstname, &buf) &&
310+
0 == inet_pton(AF_INET6, hstname, &buf)) {
311+
if (NULL != (ptr = strchr(hstname, '.'))) {
312+
(*ptr) = '\0'; // JJH RETURN HERE TO DETERMINE WHY SEGV...
313+
}
314+
}
315+
}
316+
270317
sq->hostname = hstname;
271318
opal_list_append(&sq_list, &sq->super);
272319
}

orte/runtime/orte_globals.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
1717
* Copyright (c) 2014-2015 Research Organization for Information Science
1818
* and Technology (RIST). All rights reserved.
19+
* Copyright (c) 2016 IBM Corporation. All rights reserved.
1920
* $COPYRIGHT$
2021
*
2122
* Additional copyrights may follow
@@ -77,6 +78,7 @@ bool orte_static_ports = false;
7778
char *orte_oob_static_ports = NULL;
7879
bool orte_standalone_operation = false;
7980

81+
bool orte_use_mixed_hostnames = false;
8082
bool orte_keep_fqdn_hostnames = false;
8183
bool orte_have_fqdn_allocation = false;
8284
bool orte_show_resolved_nodenames = false;

orte/runtime/orte_globals.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
1515
* All rights reserved.
1616
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
17+
* Copyright (c) 2016 IBM Corporation. All rights reserved.
1718
* $COPYRIGHT$
1819
*
1920
* Additional copyrights may follow
@@ -460,6 +461,7 @@ ORTE_DECLSPEC extern char *orte_oob_static_ports;
460461
ORTE_DECLSPEC extern bool orte_standalone_operation;
461462

462463
/* nodename flags */
464+
ORTE_DECLSPEC extern bool orte_use_mixed_hostnames;
463465
ORTE_DECLSPEC extern bool orte_keep_fqdn_hostnames;
464466
ORTE_DECLSPEC extern bool orte_have_fqdn_allocation;
465467
ORTE_DECLSPEC extern bool orte_show_resolved_nodenames;

orte/runtime/orte_mca_params.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
1717
* Copyright (c) 2014 Research Organization for Information Science
1818
* and Technology (RIST). All rights reserved.
19+
* Copyright (c) 2016 IBM Corporation. All rights reserved.
1920
* $COPYRIGHT$
2021
*
2122
* Additional copyrights may follow
@@ -421,6 +422,14 @@ int orte_register_params(void)
421422
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
422423
&orte_keep_fqdn_hostnames);
423424

425+
/* whether or not to match short hostnames to FQDN hostnames */
426+
orte_use_mixed_hostnames = false;
427+
(void) mca_base_var_register ("orte", "orte", NULL, "use_mixed_hostnames",
428+
"Whether or not to use mixed hostnames (short == FQDN) [default: no]",
429+
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
430+
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
431+
&orte_use_mixed_hostnames);
432+
424433
/* whether or not to retain aliases of hostnames */
425434
orte_retain_aliases = false;
426435
(void) mca_base_var_register ("orte", "orte", NULL, "retain_aliases",

orte/util/dash_host/dash_host.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
1414
* Copyright (c) 2015 Research Organization for Information Science
1515
* and Technology (RIST). All rights reserved.
16+
* Copyright (c) 2016 IBM Corporation. All rights reserved.
1617
* $COPYRIGHT$
1718
*
1819
* Additional copyrights may follow
@@ -23,6 +24,9 @@
2324
#include "orte_config.h"
2425

2526
#include <string.h>
27+
#if HAVE_ARPA_INET_H
28+
#include <arpa/inet.h>
29+
#endif
2630

2731
#include "orte/constants.h"
2832
#include "orte/types.h"
@@ -207,6 +211,19 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
207211
ndname = mini_map[i];
208212
}
209213

214+
// Strip off the FQDN if present
215+
if( orte_use_mixed_hostnames ) {
216+
char *ptr;
217+
struct in_addr buf;
218+
/* if the nodename is an IP address, do not mess with it! */
219+
if (0 == inet_pton(AF_INET, ndname, &buf) &&
220+
0 == inet_pton(AF_INET6, ndname, &buf)) {
221+
if (NULL != (ptr = strchr(ndname, '.'))) {
222+
*ptr = '\0';
223+
}
224+
}
225+
}
226+
210227
/* see if the node is already on the list */
211228
found = false;
212229
OPAL_LIST_FOREACH(node, &adds, orte_node_t) {

orte/util/hostfile/hostfile.c

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
1616
* Copyright (c) 2015 Research Organization for Information Science
1717
* and Technology (RIST). All rights reserved.
18+
* Copyright (c) 2016 IBM Corporation. All rights reserved.
1819
* $COPYRIGHT$
1920
*
2021
* Additional copyrights may follow
@@ -27,6 +28,9 @@
2728
#ifdef HAVE_UNISTD_H
2829
#include <unistd.h>
2930
#endif
31+
#if HAVE_ARPA_INET_H
32+
#include <arpa/inet.h>
33+
#endif
3034
#include <errno.h>
3135
#include <string.h>
3236
#include <sys/stat.h>
@@ -164,6 +168,19 @@ static int hostfile_parse_line(int token, opal_list_t* updates,
164168
}
165169
opal_argv_free (argv);
166170

171+
// Strip off the FQDN if present
172+
if( orte_use_mixed_hostnames ) {
173+
char *ptr;
174+
struct in_addr buf;
175+
/* if the nodename is an IP address, do not mess with it! */
176+
if (0 == inet_pton(AF_INET, node_name, &buf) &&
177+
0 == inet_pton(AF_INET6, node_name, &buf)) {
178+
if (NULL != (ptr = strchr(node_name, '.'))) {
179+
*ptr = '\0';
180+
}
181+
}
182+
}
183+
167184
/* if the first letter of the name is '^', then this is a node
168185
* to be excluded. Remove the ^ character so the nodename is
169186
* usable, and put it on the exclude list
@@ -270,6 +287,20 @@ static int hostfile_parse_line(int token, opal_list_t* updates,
270287
opal_output(0, "WARNING: Unhandled user@host-combination\n"); /* XXX */
271288
}
272289
opal_argv_free (argv);
290+
291+
// Strip off the FQDN if present
292+
if( orte_use_mixed_hostnames ) {
293+
char *ptr;
294+
struct in_addr buf;
295+
/* if the nodename is an IP address, do not mess with it! */
296+
if (0 == inet_pton(AF_INET, node_name, &buf) &&
297+
0 == inet_pton(AF_INET6, node_name, &buf)) {
298+
if (NULL != (ptr = strchr(node_name, '.'))) {
299+
*ptr = '\0';
300+
}
301+
}
302+
}
303+
273304
/* Do we need to make a new node object? */
274305
if (NULL == (node = hostfile_lookup(updates, node_name))) {
275306
node = OBJ_NEW(orte_node_t);

orte/util/proc_info.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
* Copyright (c) 2012 Los Alamos National Security, LLC.
1414
* All rights reserved.
1515
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved
16+
* Copyright (c) 2016 IBM Corporation. All rights reserved.
1617
* $COPYRIGHT$
1718
*
1819
* Additional copyrights may follow
@@ -51,6 +52,7 @@
5152

5253
/* provide a connection to a reqd variable */
5354
extern bool orte_keep_fqdn_hostnames;
55+
extern bool orte_use_mixed_hostnames;
5456

5557
#define ORTE_NAME_INVALID {ORTE_JOBID_INVALID, ORTE_VPID_INVALID}
5658

@@ -173,7 +175,9 @@ int orte_proc_info(void)
173175
/* add this to our list of aliases */
174176
opal_argv_append_nosize(&orte_process_info.aliases, hostname);
175177

176-
if (!orte_keep_fqdn_hostnames) {
178+
// These two options should be mutually exclusive
179+
// Cannot specify orte_use_mixed_hostnames=true and orte_keep_fqdn_hostnames=true
180+
if (!orte_keep_fqdn_hostnames || orte_use_mixed_hostnames) {
177181
/* if the nodename is an IP address, do not mess with it! */
178182
if (0 == inet_pton(AF_INET, hostname, &buf) &&
179183
0 == inet_pton(AF_INET6, hostname, &buf)) {

0 commit comments

Comments
 (0)