Skip to content

Commit 894cfdd

Browse files
authored
Merge pull request #4542 from chu11/issue4530_flux_jobs_nnodes
job-list: return nnodes if jobspec specifies nodes
2 parents 25e865b + bc64872 commit 894cfdd

File tree

4 files changed

+61
-19
lines changed

4 files changed

+61
-19
lines changed

src/modules/job-list/job_state.c

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -376,8 +376,7 @@ static int parse_res_level (struct list_ctx *ctx,
376376

377377
/* Return basename of path if there is a '/' in path. Otherwise return
378378
* full path */
379-
static const char *
380-
parse_job_name (const char *path)
379+
static const char *parse_job_name (const char *path)
381380
{
382381
char *p = strrchr (path, '/');
383382
if (p) {
@@ -398,6 +397,7 @@ static int jobspec_parse (struct list_ctx *ctx,
398397
json_error_t error;
399398
json_t *jobspec = NULL;
400399
json_t *tasks, *resources, *command, *jobspec_job = NULL;
400+
struct res_level res[3];
401401
int rc = -1;
402402

403403
if (!(jobspec = json_loads (s, 0, &error))) {
@@ -489,13 +489,33 @@ static int jobspec_parse (struct list_ctx *ctx,
489489
goto nonfatal_error;
490490
}
491491

492+
/* For jobspec version 1, expect either:
493+
* - node->slot->core->NIL
494+
* - slot->core->NIL
495+
*/
496+
memset (res, 0, sizeof (res));
497+
if (parse_res_level (ctx, job, resources, &res[0]) < 0)
498+
goto nonfatal_error;
499+
if (res[0].with && parse_res_level (ctx, job, res[0].with, &res[1]) < 0)
500+
goto nonfatal_error;
501+
if (res[1].with && parse_res_level (ctx, job, res[1].with, &res[2]) < 0)
502+
goto nonfatal_error;
503+
504+
/* Set job->nnodes if available. In jobspec version 1, only if
505+
* resources listed as node->slot->core->NIL
506+
*/
507+
if (res[0].type != NULL && !strcmp (res[0].type, "node")
508+
&& res[1].type != NULL && !strcmp (res[1].type, "slot")
509+
&& res[2].type != NULL && !strcmp (res[2].type, "core")
510+
&& res[2].with == NULL)
511+
job->nnodes = res[0].count;
512+
492513
/* Set job->ntasks
493514
*/
494515
if (json_unpack_ex (tasks, NULL, 0,
495516
"[{s:{s:i}}]",
496517
"count", "total", &job->ntasks) < 0) {
497518
int per_slot, slot_count = 0;
498-
struct res_level res[3];
499519

500520
if (json_unpack_ex (tasks, &error, 0,
501521
"[{s:{s:i}}]",
@@ -511,18 +531,6 @@ static int jobspec_parse (struct list_ctx *ctx,
511531
__FUNCTION__, (uintmax_t)job->id, per_slot);
512532
goto nonfatal_error;
513533
}
514-
/* For jobspec version 1, expect either:
515-
* - node->slot->core->NIL
516-
* - slot->core->NIL
517-
* Set job->slot_count and job->cores_per_slot.
518-
*/
519-
memset (res, 0, sizeof (res));
520-
if (parse_res_level (ctx, job, resources, &res[0]) < 0)
521-
goto nonfatal_error;
522-
if (res[0].with && parse_res_level (ctx, job, res[0].with, &res[1]) < 0)
523-
goto nonfatal_error;
524-
if (res[1].with && parse_res_level (ctx, job, res[1].with, &res[2]) < 0)
525-
goto nonfatal_error;
526534
if (res[0].type != NULL && !strcmp (res[0].type, "slot")
527535
&& res[1].type != NULL && !strcmp (res[1].type, "core")
528536
&& res[1].with == NULL) {
@@ -549,7 +557,7 @@ static int jobspec_parse (struct list_ctx *ctx,
549557
}
550558

551559
/* nonfatal error - jobspec illegal, but we'll continue on. job
552-
* listing will get initialized data */
560+
* listing will return whatever data is available */
553561
nonfatal_error:
554562
rc = 0;
555563
error:

src/modules/job-list/job_util.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,9 @@ static int store_attr (struct job *job,
9191
val = json_integer (job->ntasks);
9292
}
9393
else if (!strcmp (attr, "nnodes")) {
94-
/* job->nnodes potentially < 0 if R invalid */
95-
if (!(job->states_mask & FLUX_JOB_STATE_RUN)
96-
|| job->nnodes < 0)
94+
/* job->nnodes < 0 if not set yet or R invalid, may be set in
95+
* DEPEND or RUN state */
96+
if (job->nnodes < 0)
9797
return 0;
9898
val = json_integer (job->nnodes);
9999
}

t/t2260-job-list.t

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -719,6 +719,22 @@ test_expect_success HAVE_JQ 'flux job list outputs nnodes/ranks/nodelist correct
719719
echo $obj | jq -e ".nodelist == \"${nodes}\""
720720
'
721721

722+
# use flux queue to ensure jobs stay in pending state
723+
test_expect_success HAVE_JQ 'flux job list lists nnodes for pending jobs correctly' '
724+
flux queue stop &&
725+
id1=$(flux mini submit -N1 hostname | flux job id) &&
726+
echo ${id1} >> nnodes.ids &&
727+
id2=$(flux mini submit -N3 hostname | flux job id) &&
728+
echo ${id2} >> nnodes.ids &&
729+
flux job list -s pending | grep ${id1} &&
730+
flux job list -s pending | grep ${id2} &&
731+
flux job list-ids ${id1} | jq -e ".nnodes == 1" &&
732+
flux job list-ids ${id2} | jq -e ".nnodes == 3" &&
733+
flux job cancel ${id1} &&
734+
flux job cancel ${id2} &&
735+
flux queue start
736+
'
737+
722738
#
723739
# job success
724740
#

t/t2800-jobs-cmd.t

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1135,6 +1135,24 @@ test_expect_success HAVE_JQ 'flux jobs works on job with illegal R' '
11351135
test_cmp list_illegal_R.out list_illegal_R.exp
11361136
'
11371137

1138+
#
1139+
# special tests
1140+
#
1141+
1142+
# use flux queue to ensure jobs stay in pending state
1143+
test_expect_success HAVE_JQ 'flux jobs lists nnodes for pending jobs correctly' '
1144+
flux queue stop &&
1145+
id1=$(flux mini submit -N1 hostname) &&
1146+
id2=$(flux mini submit -N3 hostname) &&
1147+
flux jobs -no "{state},{nnodes},{nnodes:h}" ${id1} ${id2}> nnodesP.out &&
1148+
echo "SCHED,1,1" >> nnodesP.exp &&
1149+
echo "SCHED,3,3" >> nnodesP.exp &&
1150+
flux job cancel ${id1} &&
1151+
flux job cancel ${id2} &&
1152+
flux queue start &&
1153+
test_cmp nnodesP.exp nnodesP.out
1154+
'
1155+
11381156
#
11391157
# leave job cleanup to rc3
11401158
#

0 commit comments

Comments
 (0)