Skip to content

Commit f888fda

Browse files
committed
job-list: support hostlist constraint
Problem: It would be convenient to filter jobs based on the nodes they ran on. Add a constraint operator "hostlist" to filter on nodes within the job nodelist. Multiple nodes can be specified. Hostlists represented in RFC29 format are acceptable for input to the constraint.
1 parent 428defa commit f888fda

File tree

3 files changed

+113
-1
lines changed

3 files changed

+113
-1
lines changed

src/modules/job-list/match.c

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ typedef enum {
4646
MATCH_LESS_THAN = 4,
4747
} match_comparison_t;
4848

49+
#define MIN_MATCH_HOSTLIST 1024
50+
4951
struct timestamp_value {
5052
double t_value;
5153
match_timestamp_type_t t_type;
@@ -379,6 +381,82 @@ static struct list_constraint *create_results_constraint (struct match_ctx *mctx
379381
errp);
380382
}
381383

384+
static bool match_hostlist (struct list_constraint *c, const struct job *job)
385+
{
386+
struct hostlist *hl = zlistx_first (c->values);
387+
const char *host;
388+
/* nodelist may not exist if job never ran */
389+
if (!job->nodelist)
390+
return false;
391+
if (!job->nodelist_hl) {
392+
/* hack to remove const */
393+
struct job *jobtmp = (struct job *)job;
394+
if (!(jobtmp->nodelist_hl = hostlist_decode (job->nodelist)))
395+
return false;
396+
}
397+
host = hostlist_first (hl);
398+
while (host) {
399+
if (hostlist_find (job->nodelist_hl, host) >= 0)
400+
return true;
401+
host = hostlist_next (hl);
402+
}
403+
return false;
404+
}
405+
406+
/* zlistx_set_destructor */
407+
static void wrap_hostlist_destroy (void **item)
408+
{
409+
if (item) {
410+
struct hostlist *hl = *item;
411+
hostlist_destroy (hl);
412+
(*item) = NULL;
413+
}
414+
}
415+
416+
static struct list_constraint *create_hostlist_constraint (
417+
struct match_ctx *mctx,
418+
json_t *values,
419+
flux_error_t *errp)
420+
{
421+
struct list_constraint *c;
422+
struct hostlist *hl = NULL;
423+
json_t *entry;
424+
size_t index;
425+
426+
if (!(c = list_constraint_new (mctx,
427+
match_hostlist,
428+
wrap_hostlist_destroy,
429+
errp)))
430+
return NULL;
431+
/* Create a single hostlist if user specifies multiple nodes or
432+
* RFC29 hostlist range */
433+
if (!(hl = hostlist_create ()))
434+
goto error;
435+
json_array_foreach (values, index, entry) {
436+
if (!json_is_string (entry)) {
437+
errprintf (errp, "host value must be a string");
438+
goto error;
439+
}
440+
if (hostlist_append (hl, json_string_value (entry)) <= 0) {
441+
errprintf (errp, "host value not in valid Hostlist format");
442+
goto error;
443+
}
444+
}
445+
if (hostlist_count (hl) > mctx->max_hostlist) {
446+
errprintf (errp, "too many hosts specified");
447+
goto error;
448+
}
449+
if (!zlistx_add_end (c->values, hl)) {
450+
hostlist_destroy (hl);
451+
goto error;
452+
}
453+
return c;
454+
error:
455+
hostlist_destroy (hl);
456+
list_constraint_destroy (c);
457+
return NULL;
458+
}
459+
382460
static bool match_timestamp (struct list_constraint *c,
383461
const struct job *job)
384462
{
@@ -595,6 +673,8 @@ struct list_constraint *list_constraint_create (struct match_ctx *mctx,
595673
return create_states_constraint (mctx, values, errp);
596674
else if (streq (op, "results"))
597675
return create_results_constraint (mctx, values, errp);
676+
else if (streq (op, "hostlist"))
677+
return create_hostlist_constraint (mctx, values, errp);
598678
else if (streq (op, "t_submit")
599679
|| streq (op, "t_depend")
600680
|| streq (op, "t_run")
@@ -622,11 +702,41 @@ bool job_match (const struct job *job, struct list_constraint *constraint)
622702
struct match_ctx *match_ctx_create (flux_t *h)
623703
{
624704
struct match_ctx *mctx = NULL;
705+
int saved_errno;
625706

626707
if (!(mctx = calloc (1, sizeof (*mctx))))
627708
return NULL;
628709
mctx->h = h;
710+
711+
if (flux_get_size (mctx->h, &mctx->max_hostlist) < 0)
712+
goto error;
713+
714+
/* Notes:
715+
*
716+
* We do not want a hostlist constraint match to DoS this module.
717+
* So we want to configure a "max" amount of hosts that can exist
718+
* within a hostlist constraint.
719+
*
720+
* Under normal operating conditions, the number of brokers should
721+
* represent the most likely maximum. But there are some corner
722+
* cases. For example, the instance gets reconfigured to be
723+
* smaller, which is not an uncommon thing to do towards a
724+
* cluster's end of life and hardware is beginning to die.
725+
*
726+
* So we configure the following compromise. If the number of
727+
* brokers is below our defined minimum MIN_MATCH_HOSTLIST, we'll
728+
* allow max_hostlist to be increased to this number.
729+
*/
730+
if (mctx->max_hostlist < MIN_MATCH_HOSTLIST)
731+
mctx->max_hostlist = MIN_MATCH_HOSTLIST;
732+
629733
return mctx;
734+
735+
error:
736+
saved_errno = errno;
737+
match_ctx_destroy (mctx);
738+
errno = saved_errno;
739+
return NULL;
630740
}
631741

632742
void match_ctx_destroy (struct match_ctx *mctx)

src/modules/job-list/match.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
struct match_ctx {
2424
flux_t *h;
25+
uint32_t max_hostlist; /* for hostlist match */
2526
};
2627

2728
struct match_ctx *match_ctx_create (flux_t *h);

src/modules/job-list/state_match.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,8 @@ struct state_constraint *state_constraint_create (json_t *constraint, flux_error
366366
}
367367
if (streq (op, "userid")
368368
|| streq (op, "name")
369-
|| streq (op, "queue"))
369+
|| streq (op, "queue")
370+
|| streq (op, "hostlist"))
370371
return state_constraint_new (match_maybe, NULL, errp);
371372
else if (streq (op, "results"))
372373
return state_constraint_new (match_result, NULL, errp);

0 commit comments

Comments
 (0)