Skip to content

Commit 5fe073b

Browse files
authored
Merge pull request #5735 from grondo/taskmap-raw
libtaskmap: support decode of raw (semicolon-delimited) taskmaps
2 parents e71f17b + 6778358 commit 5fe073b

File tree

4 files changed

+240
-14
lines changed

4 files changed

+240
-14
lines changed

doc/man1/flux-job.rst

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -349,9 +349,12 @@ support task mapping formats:
349349
.. option:: --to=raw|pmi|multiline
350350

351351
Convert the taskmap to *raw* or *pmi* formats (described in RFC 34), or
352-
*multiline* which prints the node ID of each task, one per line.
352+
*multiline* which prints the node ID of each task, one per line. The
353+
default behavior is to print the RFC 34 taskmap. This option can be useful
354+
to convert between mapping forms, since :program:`flux job taskmap` can
355+
take a raw, pmi, or RFC 34 task map on the command line.
353356

354-
One one of the above options may be used per call.
357+
Only one of the above options may be used per call.
355358

356359
timeleft
357360
--------

src/common/libtaskmap/taskmap.c

Lines changed: 189 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,188 @@ static struct taskmap *taskmap_decode_pmi (const char *s, flux_error_t *errp)
415415
return NULL;
416416
}
417417

418+
struct raw_task {
419+
int taskid;
420+
int nodeid;
421+
int repeat;
422+
};
423+
424+
static void item_destructor (void **item)
425+
{
426+
if (item) {
427+
free (*item);
428+
*item = NULL;
429+
}
430+
}
431+
432+
static int taskid_cmp (const void *a, const void *b)
433+
{
434+
const struct raw_task *t1 = a;
435+
const struct raw_task *t2 = b;
436+
return (t1->taskid - t2->taskid);
437+
}
438+
439+
static int raw_task_append (zlistx_t *l, int taskid, int nodeid, int repeat)
440+
{
441+
struct raw_task *t = calloc (1, sizeof (*t));
442+
if (!t)
443+
return -1;
444+
t->taskid = taskid;
445+
t->nodeid = nodeid;
446+
t->repeat = repeat;
447+
if (!zlistx_add_end (l, t)) {
448+
free (t);
449+
return -1;
450+
}
451+
return 0;
452+
}
453+
454+
static zlistx_t *raw_task_list_create (void)
455+
{
456+
zlistx_t *l;
457+
if (!(l = zlistx_new ())) {
458+
errno = ENOMEM;
459+
return NULL;
460+
}
461+
zlistx_set_destructor (l, item_destructor);
462+
zlistx_set_comparator (l, &taskid_cmp);
463+
return l;
464+
}
465+
466+
static int raw_task_list_append (zlistx_t *l,
467+
const char *s,
468+
int nodeid,
469+
flux_error_t *errp)
470+
{
471+
int rc = -1;
472+
unsigned int id;
473+
idset_error_t error;
474+
struct idset *ids;
475+
476+
if (!(ids = idset_decode_ex (s, -1, 0, IDSET_FLAG_AUTOGROW, &error))) {
477+
errprintf (errp, "%s", error.text);
478+
goto error;
479+
}
480+
id = idset_first (ids);
481+
while (id != IDSET_INVALID_ID) {
482+
unsigned int next = idset_next (ids, id);
483+
int repeat = 1;
484+
while (next == id + repeat) {
485+
next = idset_next (ids, next);
486+
repeat++;
487+
}
488+
if (raw_task_append (l, id, nodeid, repeat) < 0) {
489+
errprintf (errp, "Out of memory");
490+
goto error;
491+
}
492+
id = next;
493+
}
494+
rc = 0;
495+
error:
496+
idset_destroy (ids);
497+
return rc;
498+
}
499+
500+
static int raw_task_check (struct raw_task *a,
501+
struct raw_task *b,
502+
flux_error_t *errp)
503+
{
504+
struct raw_task t_init = { .taskid = -1, .repeat = 1 };
505+
int start, end1, end2, end;
506+
507+
if (a == NULL)
508+
a = &t_init;
509+
510+
/* Note: a->taskid <= b->taskid since taskmap_decode_raw() sorts
511+
* raw_task objects.
512+
*/
513+
start = b->taskid;
514+
end1 = a->taskid + a->repeat - 1;
515+
end2 = b->taskid + b->repeat - 1;
516+
end = end1 <= end2 ? end1 : end2;
517+
518+
/* If end - start is nonzero then we have overlap. report it.
519+
*/
520+
int overlap = end - start;
521+
if (overlap >= 0) {
522+
/* taskid overlap detected, report as error
523+
*/
524+
if (overlap == 0)
525+
errprintf (errp, "duplicate taskid specified: %d", start);
526+
else
527+
errprintf (errp, "duplicate taskids specified: %d-%d", start, end);
528+
return -1;
529+
}
530+
/* Now check that tasks are consecutive. It is an error if not since
531+
* holes in taskids in a taskmap are not allowed
532+
*/
533+
if (overlap != -1) {
534+
if (overlap == -2)
535+
return errprintf (errp, "missing taskid: %d", end + 1);
536+
else
537+
return errprintf (errp,
538+
"missing taskids: %d-%d",
539+
end + 1,
540+
end - overlap - 1);
541+
}
542+
return 0;
543+
}
544+
545+
static struct taskmap *taskmap_decode_raw (const char *s, flux_error_t *errp)
546+
{
547+
char *tok;
548+
char *p;
549+
char *q;
550+
char *cpy = NULL;
551+
struct taskmap *map = NULL;
552+
zlistx_t *l = NULL;
553+
int nodeid = 0;
554+
struct raw_task *t, *prev;
555+
556+
if (!s || strlen (s) == 0) {
557+
errprintf (errp, "Invalid argument");
558+
return NULL;
559+
}
560+
if (!(map = taskmap_create ())
561+
|| !(cpy = strdup (s))
562+
|| !(l = raw_task_list_create ())) {
563+
errprintf (errp, "Out of memory");
564+
goto error;
565+
}
566+
567+
p = cpy;
568+
569+
while ((tok = strtok_r (p, ";", &q))) {
570+
if (raw_task_list_append (l, tok, nodeid++, errp) < 0)
571+
goto error;
572+
p = NULL;
573+
}
574+
575+
/* sort by taskid */
576+
zlistx_sort (l);
577+
t = zlistx_first (l);
578+
prev = NULL;
579+
580+
while (t) {
581+
if (raw_task_check (prev, t, errp) < 0)
582+
goto error;
583+
if (taskmap_append (map, t->nodeid, 1, t->repeat) < 0) {
584+
errprintf (errp, "taskmap_append: %s", strerror (errno));
585+
goto error;
586+
}
587+
prev = t;
588+
t = zlistx_next (l);
589+
}
590+
zlistx_destroy (&l);
591+
free (cpy);
592+
return map;
593+
error:
594+
zlistx_destroy (&l);
595+
taskmap_destroy (map);
596+
free (cpy);
597+
return NULL;
598+
}
599+
418600
struct taskmap *taskmap_decode (const char *s, flux_error_t *errp)
419601
{
420602
struct taskmap *map = NULL;
@@ -435,11 +617,17 @@ struct taskmap *taskmap_decode (const char *s, flux_error_t *errp)
435617
|| strstr (s, "vector,"))
436618
return taskmap_decode_pmi (s, errp);
437619

620+
/* A string without special characters might be a raw taskmap:
621+
*/
622+
if (!strpbrk (s, "({[]})"))
623+
return taskmap_decode_raw (s, errp);
624+
625+
/* O/w, decode as RFC 34 Taskmap
626+
*/
438627
if (!(o = json_loads (s, JSON_DECODE_ANY, &error))) {
439628
errprintf (errp, "%s", error.text);
440629
goto out;
441630
}
442-
443631
map = taskmap_decode_json (o, errp);
444632
out:
445633
json_decref (o);
@@ -675,14 +863,6 @@ static char *list_join (zlistx_t *l, char *sep)
675863
return result;
676864
}
677865

678-
static void item_destructor (void **item)
679-
{
680-
if (item) {
681-
free (*item);
682-
*item = NULL;
683-
}
684-
}
685-
686866
static char *taskmap_encode_raw (const struct taskmap *map, int flags)
687867
{
688868
char *result = NULL;

src/common/libtaskmap/taskmap.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,9 @@ void taskmap_destroy (struct taskmap *map);
4141
int taskmap_append (struct taskmap *map, int nodeid, int nnodes, int ppn);
4242

4343
/* Decode string 'map' into taskmap object.
44-
* The string may be a JSON array, RFC 34 wrapped object, or a mapping
45-
* encoded in PMI-1 PMI_process_mapping form described in RFC 13.
44+
* The string may be a JSON array, RFC 34 wrapped object, a mapping
45+
* encoded in PMI-1 PMI_process_mapping form described in RFC 13, or
46+
* a raw, semicolon-delimited list of taskids.
4647
* Returns taskmap on success, or NULL on error with error string in 'errp'.
4748
*/
4849
struct taskmap *taskmap_decode (const char *map, flux_error_t *errp);

src/common/libtaskmap/test/taskmap.c

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,22 @@ static void rfc34_tests ()
8585
"taskmap is known");
8686
taskmap_destroy (map);
8787
free (s);
88+
89+
/* Try raw back to taskmap:
90+
*/
91+
map = taskmap_decode (t->expected, NULL);
92+
ok (map != NULL,
93+
"taskmap_decode (%s)",
94+
t->expected);
95+
if (map) {
96+
ok ((s = taskmap_encode (map, 0)) != NULL,
97+
"taskmap_encode works");
98+
is (s, t->taskmap,
99+
"taskmap=%s",
100+
s);
101+
taskmap_destroy (map);
102+
free (s);
103+
}
88104
}
89105
}
90106

@@ -272,7 +288,6 @@ static void main_tests ()
272288

273289
static const char *invalid[] = {
274290
"}",
275-
"42",
276291
"{}",
277292
"{\"version\":1}",
278293
"{\"version\":1,\"map\":{}}",
@@ -553,6 +568,32 @@ void test_deranged (void)
553568
taskmap_destroy (map);
554569
}
555570

571+
struct test_vector raw_tests[] = {
572+
{ "-1", "error parsing range '-1'" },
573+
{ "1-3;a-b", "error parsing range 'a-b'" },
574+
{ "1,1", "range '1' is out of order" },
575+
{ "0-1;1-2", "duplicate taskid specified: 1" },
576+
{ "5-15;0-10", "duplicate taskids specified: 5-10" },
577+
{ "1", "missing taskid: 0" },
578+
{ "3-4;0-1", "missing taskid: 2" },
579+
{ "0-1;10-11", "missing taskids: 2-9" },
580+
{ NULL, NULL },
581+
};
582+
583+
static void test_raw_decode_errors (void)
584+
{
585+
struct test_vector *t;
586+
for (t = &raw_tests[0]; t->taskmap != NULL; t++) {
587+
flux_error_t error;
588+
ok (taskmap_decode (t->taskmap, &error) == NULL,
589+
"taskmap_decode (%s) fails",
590+
t->taskmap);
591+
is (error.text, t->expected,
592+
"taskmap_decode: %s",
593+
error.text);
594+
}
595+
}
596+
556597
int main (int ac, char **av)
557598
{
558599
plan (NO_PLAN);
@@ -565,6 +606,7 @@ int main (int ac, char **av)
565606
append_cyclic_one ();
566607
test_check ();
567608
test_deranged ();
609+
test_raw_decode_errors ();
568610
done_testing ();
569611
}
570612

0 commit comments

Comments
 (0)