Skip to content

Commit e75e4e1

Browse files
ggouaillardetScott Miller
authored andcommitted
regx/reverse: add the reverse component
Search for the digits to be compressed from the end of the node names. For example, if the nodelist is c712f6n01,c712f6n02,c712f6n03 the regx/fwd component generates c[3:712]f6n01,c[3:712]f6n02,c[3:712]f6n03@(3) when the regx/reverse component generates c712f6n[2:1-3]@0(3) which is a better fit here. Josh Hursey authored the changes and must be credited. Signed-off-by: Gilles Gouaillardet <[email protected]> (cherry picked from commit 4130c93)
1 parent b267915 commit e75e4e1

File tree

1 file changed

+288
-31
lines changed

1 file changed

+288
-31
lines changed
Lines changed: 288 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
/*
22
* Copyright (c) 2016-2018 Intel, Inc. All rights reserved.
3+
* Copyright (c) 2018 IBM Corporation. All rights reserved.
4+
* Copyright (c) 2018 Research Organization for Information Science
5+
* and Technology (RIST). All rights reserved.
36
* $COPYRIGHT$
47
*
58
* Additional copyrights may follow
@@ -23,52 +26,306 @@
2326

2427
#include "orte/runtime/orte_globals.h"
2528
#include "orte/util/name_fns.h"
29+
#include "orte/util/show_help.h"
30+
#include "orte/mca/errmgr/errmgr.h"
31+
#include "orte/mca/rmaps/base/base.h"
32+
#include "orte/mca/routed/routed.h"
2633
#include "orte/mca/regx/base/base.h"
2734

2835
#include "regx_reverse.h"
2936

3037
static int nidmap_create(opal_pointer_array_t *pool, char **regex);
31-
static int nidmap_parse(char *regex);
32-
static int encode_nodemap(opal_buffer_t *buffer);
33-
static int decode_daemon_nodemap(opal_buffer_t *buffer);
34-
static int generate_ppn(orte_job_t *jdata, char **ppn);
35-
static int parse_ppn(orte_job_t *jdata, char *ppn);
3638

3739
orte_regx_base_module_t orte_regx_reverse_module = {
3840
.nidmap_create = nidmap_create,
39-
.nidmap_parse = nidmap_parse,
40-
.encode_nodemap = encode_nodemap,
41-
.decode_daemon_nodemap = decode_daemon_nodemap,
42-
.generate_ppn = generate_ppn,
43-
.parse_ppn = parse_ppn
41+
.nidmap_parse = orte_regx_base_nidmap_parse,
42+
.extract_node_names = orte_regx_base_extract_node_names,
43+
.encode_nodemap = orte_regx_base_encode_nodemap,
44+
.decode_daemon_nodemap = orte_regx_base_decode_daemon_nodemap,
45+
.generate_ppn = orte_regx_base_generate_ppn,
46+
.parse_ppn = orte_regx_base_parse_ppn
4447
};
4548

4649
static int nidmap_create(opal_pointer_array_t *pool, char **regex)
4750
{
48-
return ORTE_ERR_NOT_IMPLEMENTED;
49-
}
51+
char *node;
52+
char prefix[ORTE_MAX_NODE_PREFIX];
53+
int i, j, n, len, startnum, nodenum, numdigits;
54+
bool found;
55+
char *suffix, *sfx, *nodenames;
56+
orte_regex_node_t *ndreg;
57+
orte_regex_range_t *range, *rng;
58+
opal_list_t nodenms, dvpids;
59+
opal_list_item_t *item, *itm2;
60+
char **regexargs = NULL, *tmp, *tmp2;
61+
orte_node_t *nptr;
62+
orte_vpid_t vpid;
5063

51-
static int nidmap_parse(char *regex)
52-
{
53-
return ORTE_ERR_NOT_IMPLEMENTED;
54-
}
64+
OBJ_CONSTRUCT(&nodenms, opal_list_t);
65+
OBJ_CONSTRUCT(&dvpids, opal_list_t);
5566

56-
static int encode_nodemap(opal_buffer_t *buffer)
57-
{
58-
return ORTE_ERR_NOT_IMPLEMENTED;
59-
}
67+
rng = NULL;
68+
for (n=0; n < pool->size; n++) {
69+
if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(pool, n))) {
70+
continue;
71+
}
72+
/* if no daemon has been assigned, then this node is not being used */
73+
if (NULL == nptr->daemon) {
74+
vpid = -1; // indicates no daemon assigned
75+
} else {
76+
vpid = nptr->daemon->name.vpid;
77+
}
78+
/* deal with the daemon vpid - see if it is next in the
79+
* current range */
80+
if (NULL == rng) {
81+
/* just starting */
82+
rng = OBJ_NEW(orte_regex_range_t);
83+
rng->vpid = vpid;
84+
rng->cnt = 1;
85+
opal_list_append(&dvpids, &rng->super);
86+
} else if (UINT32_MAX == vpid) {
87+
if (-1 == rng->vpid) {
88+
rng->cnt++;
89+
} else {
90+
/* need to start another range */
91+
rng = OBJ_NEW(orte_regex_range_t);
92+
rng->vpid = vpid;
93+
rng->cnt = 1;
94+
opal_list_append(&dvpids, &rng->super);
95+
}
96+
} else if (-1 == rng->vpid) {
97+
/* need to start another range */
98+
rng = OBJ_NEW(orte_regex_range_t);
99+
rng->vpid = vpid;
100+
rng->cnt = 1;
101+
opal_list_append(&dvpids, &rng->super);
102+
} else {
103+
/* is this the next in line */
104+
if (vpid == (orte_vpid_t)(rng->vpid + rng->cnt)) {
105+
rng->cnt++;
106+
} else {
107+
/* need to start another range */
108+
rng = OBJ_NEW(orte_regex_range_t);
109+
rng->vpid = vpid;
110+
rng->cnt = 1;
111+
opal_list_append(&dvpids, &rng->super);
112+
}
113+
}
114+
node = nptr->name;
115+
opal_output_verbose(5, orte_regx_base_framework.framework_output,
116+
"%s PROCESS NODE <%s>",
117+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
118+
node);
119+
/* determine this node's prefix by looking for first digit char */
120+
len = strlen(node);
121+
startnum = -1;
122+
memset(prefix, 0, ORTE_MAX_NODE_PREFIX);
123+
numdigits = 0;
60124

61-
static int decode_daemon_nodemap(opal_buffer_t *buffer)
62-
{
63-
return ORTE_ERR_NOT_IMPLEMENTED;
64-
}
125+
/* Valid hostname characters are:
126+
* - ascii letters, digits, and the '-' character.
127+
* Determine the prefix in reverse to better support hostnames like:
128+
* c712f6n01, c699c086 where there are sets of digits, and the lowest
129+
* set changes most frequently.
130+
*/
131+
startnum = -1;
132+
memset(prefix, 0, ORTE_MAX_NODE_PREFIX);
133+
numdigits = 0;
134+
for (i=len-1; i >= 0; i--) {
135+
// Count all of the digits
136+
if( isdigit(node[i]) ) {
137+
numdigits++;
138+
continue;
139+
}
140+
else {
141+
// At this point everything at and above position 'i' is prefix.
142+
for( j = 0; j <= i; ++j) {
143+
prefix[j] = node[j];
144+
}
145+
startnum = j;
146+
break;
147+
}
148+
}
65149

66-
static int generate_ppn(orte_job_t *jdata, char **ppn)
67-
{
68-
return ORTE_ERR_NOT_IMPLEMENTED;
69-
}
150+
opal_output_verbose(5, orte_regx_base_framework.framework_output,
151+
"%s PROCESS NODE <%s> : reverse / prefix \"%s\" / numdigits %d",
152+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
153+
node, prefix, numdigits);
70154

71-
static int parse_ppn(orte_job_t *jdata, char *ppn)
72-
{
73-
return ORTE_ERR_NOT_IMPLEMENTED;
155+
if (startnum < 0) {
156+
/* can't compress this name - just add it to the list */
157+
ndreg = OBJ_NEW(orte_regex_node_t);
158+
ndreg->prefix = strdup(node);
159+
opal_list_append(&nodenms, &ndreg->super);
160+
continue;
161+
}
162+
/* convert the digits and get any suffix */
163+
nodenum = strtol(&node[startnum], &sfx, 10);
164+
if (NULL != sfx) {
165+
suffix = strdup(sfx);
166+
} else {
167+
suffix = NULL;
168+
}
169+
/* is this node name already on our list? */
170+
found = false;
171+
for (item = opal_list_get_first(&nodenms);
172+
!found && item != opal_list_get_end(&nodenms);
173+
item = opal_list_get_next(item)) {
174+
ndreg = (orte_regex_node_t*)item;
175+
if (0 < strlen(prefix) && NULL == ndreg->prefix) {
176+
continue;
177+
}
178+
if (0 == strlen(prefix) && NULL != ndreg->prefix) {
179+
continue;
180+
}
181+
if (0 < strlen(prefix) && NULL != ndreg->prefix
182+
&& 0 != strcmp(prefix, ndreg->prefix)) {
183+
continue;
184+
}
185+
if (NULL == suffix && NULL != ndreg->suffix) {
186+
continue;
187+
}
188+
if (NULL != suffix && NULL == ndreg->suffix) {
189+
continue;
190+
}
191+
if (NULL != suffix && NULL != ndreg->suffix &&
192+
0 != strcmp(suffix, ndreg->suffix)) {
193+
continue;
194+
}
195+
if (numdigits != ndreg->num_digits) {
196+
continue;
197+
}
198+
/* found a match - flag it */
199+
found = true;
200+
/* get the last range on this nodeid - we do this
201+
* to preserve order
202+
*/
203+
range = (orte_regex_range_t*)opal_list_get_last(&ndreg->ranges);
204+
if (NULL == range) {
205+
/* first range for this nodeid */
206+
range = OBJ_NEW(orte_regex_range_t);
207+
range->vpid = nodenum;
208+
range->cnt = 1;
209+
opal_list_append(&ndreg->ranges, &range->super);
210+
break;
211+
}
212+
/* see if the node number is out of sequence */
213+
if (nodenum != (range->vpid + range->cnt)) {
214+
/* start a new range */
215+
range = OBJ_NEW(orte_regex_range_t);
216+
range->vpid = nodenum;
217+
range->cnt = 1;
218+
opal_list_append(&ndreg->ranges, &range->super);
219+
break;
220+
}
221+
/* everything matches - just increment the cnt */
222+
range->cnt++;
223+
break;
224+
}
225+
if (!found) {
226+
/* need to add it */
227+
ndreg = OBJ_NEW(orte_regex_node_t);
228+
if (0 < strlen(prefix)) {
229+
ndreg->prefix = strdup(prefix);
230+
}
231+
if (NULL != suffix) {
232+
ndreg->suffix = strdup(suffix);
233+
}
234+
ndreg->num_digits = numdigits;
235+
opal_list_append(&nodenms, &ndreg->super);
236+
/* record the first range for this nodeid - we took
237+
* care of names we can't compress above
238+
*/
239+
range = OBJ_NEW(orte_regex_range_t);
240+
range->vpid = nodenum;
241+
range->cnt = 1;
242+
opal_list_append(&ndreg->ranges, &range->super);
243+
}
244+
if (NULL != suffix) {
245+
free(suffix);
246+
}
247+
}
248+
/* begin constructing the regular expression */
249+
while (NULL != (item = opal_list_remove_first(&nodenms))) {
250+
ndreg = (orte_regex_node_t*)item;
251+
252+
/* if no ranges, then just add the name */
253+
if (0 == opal_list_get_size(&ndreg->ranges)) {
254+
if (NULL != ndreg->prefix) {
255+
/* solitary node */
256+
asprintf(&tmp, "%s", ndreg->prefix);
257+
opal_argv_append_nosize(&regexargs, tmp);
258+
free(tmp);
259+
}
260+
OBJ_RELEASE(ndreg);
261+
continue;
262+
}
263+
/* start the regex for this nodeid with the prefix */
264+
if (NULL != ndreg->prefix) {
265+
asprintf(&tmp, "%s[%d:", ndreg->prefix, ndreg->num_digits);
266+
} else {
267+
asprintf(&tmp, "[%d:", ndreg->num_digits);
268+
}
269+
/* add the ranges */
270+
while (NULL != (itm2 = opal_list_remove_first(&ndreg->ranges))) {
271+
range = (orte_regex_range_t*)itm2;
272+
if (1 == range->cnt) {
273+
asprintf(&tmp2, "%s%u,", tmp, range->vpid);
274+
} else {
275+
asprintf(&tmp2, "%s%u-%u,", tmp, range->vpid, range->vpid + range->cnt - 1);
276+
}
277+
free(tmp);
278+
tmp = tmp2;
279+
OBJ_RELEASE(range);
280+
}
281+
/* replace the final comma */
282+
tmp[strlen(tmp)-1] = ']';
283+
if (NULL != ndreg->suffix) {
284+
/* add in the suffix, if provided */
285+
asprintf(&tmp2, "%s%s", tmp, ndreg->suffix);
286+
free(tmp);
287+
tmp = tmp2;
288+
}
289+
opal_argv_append_nosize(&regexargs, tmp);
290+
free(tmp);
291+
OBJ_RELEASE(ndreg);
292+
}
293+
294+
/* assemble final result */
295+
nodenames = opal_argv_join(regexargs, ',');
296+
/* cleanup */
297+
opal_argv_free(regexargs);
298+
OBJ_DESTRUCT(&nodenms);
299+
300+
/* do the same for the vpids */
301+
tmp = NULL;
302+
while (NULL != (item = opal_list_remove_first(&dvpids))) {
303+
rng = (orte_regex_range_t*)item;
304+
if (1 < rng->cnt) {
305+
if (NULL == tmp) {
306+
asprintf(&tmp, "%u(%u)", rng->vpid, rng->cnt);
307+
} else {
308+
asprintf(&tmp2, "%s,%u(%u)", tmp, rng->vpid, rng->cnt);
309+
free(tmp);
310+
tmp = tmp2;
311+
}
312+
} else {
313+
if (NULL == tmp) {
314+
asprintf(&tmp, "%u", rng->vpid);
315+
} else {
316+
asprintf(&tmp2, "%s,%u", tmp, rng->vpid);
317+
free(tmp);
318+
tmp = tmp2;
319+
}
320+
}
321+
OBJ_RELEASE(rng);
322+
}
323+
OPAL_LIST_DESTRUCT(&dvpids);
324+
325+
/* now concatenate the results into one string */
326+
asprintf(&tmp2, "%s@%s", nodenames, tmp);
327+
free(nodenames);
328+
free(tmp);
329+
*regex = tmp2;
330+
return ORTE_SUCCESS;
74331
}

0 commit comments

Comments
 (0)