Skip to content

Commit eb5faf5

Browse files
authored
Merge pull request #4630 from chu11/issue4593_per_resource_type_core
job-list: handle per-resource "cores" special cases
2 parents 26101d7 + abd28d9 commit eb5faf5

File tree

4 files changed

+111
-16
lines changed

4 files changed

+111
-16
lines changed

src/modules/job-list/job_data.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
#include "src/common/libczmqcontainers/czmq_containers.h"
2121
#include "src/common/librlist/rlist.h"
22+
#include "src/common/librlist/rnode.h"
2223
#include "src/common/libccan/ccan/str/str.h"
2324
#include "src/common/libjob/jj.h"
2425

@@ -220,6 +221,19 @@ static int parse_jobspec_ntasks (struct job *job, struct jj_counts *jj)
220221
job->ntasks = jj->nnodes * count;
221222
return 0;
222223
}
224+
if (streq (type, "core")) {
225+
if (jj->nnodes == 0)
226+
job->ntasks = jj->nslots * jj->slot_size * count;
227+
else {
228+
/* if nnodes > 0, can't determine until nodes
229+
* allocated and number of cores on node(s) are known.
230+
* Set a flag / count to retrieve data later when
231+
* R has been retrieved.
232+
*/
233+
job->ntasks_per_core_on_node_count = count;
234+
}
235+
return 0;
236+
}
223237
}
224238

225239
job->ntasks = jj->nslots;
@@ -350,6 +364,16 @@ int job_parse_R (struct job *job, const char *s)
350364
if (!(job->nodelist = hostlist_encode (hl)))
351365
goto nonfatal_error;
352366

367+
if (job->ntasks_per_core_on_node_count > 0) {
368+
int core_count = 0;
369+
struct rnode *rnode = zlistx_first (rl->nodes);
370+
while (rnode) {
371+
core_count += idset_count (rnode->cores->ids);
372+
rnode = zlistx_next (rl->nodes);
373+
}
374+
job->ntasks = core_count * job->ntasks_per_core_on_node_count;
375+
}
376+
353377
/* nonfatal error - invalid R, but we'll continue on. job listing
354378
* will get initialized data */
355379
nonfatal_error:

src/modules/job-list/job_data.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ struct job {
4747
const char *name;
4848
const char *queue;
4949
int ntasks;
50+
int ntasks_per_core_on_node_count; /* flag for ntasks calculation */
5051
double duration;
5152
int nnodes;
5253
char *ranks;

t/t2260-job-list.t

Lines changed: 86 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -711,6 +711,73 @@ test_expect_success HAVE_JQ 'flux job list outputs ntasks correctly (tasks-per-n
711711
echo $obj | jq -e ".ntasks == 6"
712712
'
713713

714+
# N.B. As of this test writing, tasks-per-node uses
715+
# per-resource.type=node. But write more direct test in case of
716+
# future changes.
717+
test_expect_success HAVE_JQ 'flux job list outputs ntasks correctly (per-resource.type=node)' '
718+
nnodes=$(flux resource list -s up -no {nnodes}) &&
719+
ncores=$(flux resource list -s up -no {ncores}) &&
720+
extra=$((ncores / nnodes + 2)) &&
721+
jobid=$(flux mini submit --wait -N ${nnodes} -n ${ncores} \
722+
-o per-resource.type=node \
723+
-o per-resource.count=${extra} \
724+
hostname | flux job id) &&
725+
echo $jobid > taskcount6.id &&
726+
wait_jobid_state $jobid inactive &&
727+
obj=$(flux job list -s inactive | grep $jobid) &&
728+
expected=$((nnodes * extra)) &&
729+
echo ${expected} > per_resource_type_node_ntasks.exp &&
730+
echo $obj | jq -e ".ntasks == ${expected}"
731+
'
732+
733+
test_expect_success HAVE_JQ 'flux job list outputs ntasks correctly (cores / tasks-per-core)' '
734+
jobid=`flux mini submit --wait --cores=4 --tasks-per-core=2 hostname | flux job id` &&
735+
echo $jobid > taskcount7.id &&
736+
wait_jobid_state $jobid inactive &&
737+
obj=$(flux job list -s inactive | grep $jobid) &&
738+
echo $obj | jq -e ".ntasks == 8"
739+
'
740+
741+
test_expect_success HAVE_JQ 'flux job list outputs ntasks correctly (tasks / cores-per-task)' '
742+
jobid=$(flux mini submit --wait -n2 --cores-per-task=2 \
743+
-o per-resource.type=core \
744+
-o per-resource.count=2 \
745+
hostname | flux job id) &&
746+
echo $jobid > taskcount8.id &&
747+
wait_jobid_state $jobid inactive &&
748+
obj=$(flux job list -s inactive | grep $jobid) &&
749+
echo $obj | jq -e ".ntasks == 8"
750+
'
751+
752+
test_expect_success HAVE_JQ 'flux job list outputs ntasks correctly (nodes / tasks-per-core 2)' '
753+
nnodes=$(flux resource list -s up -no {nnodes}) &&
754+
ncores=$(flux resource list -s up -no {ncores}) &&
755+
jobid=`flux mini submit --wait -N ${nnodes} --tasks-per-core=2 hostname | flux job id` &&
756+
echo $jobid > taskcount9.id &&
757+
wait_jobid_state $jobid inactive &&
758+
expected=$((ncores * 2)) &&
759+
echo ${expected} > per_resource_type_core_ntasks1.exp &&
760+
obj=$(flux job list -s inactive | grep $jobid) &&
761+
echo $obj | jq -e ".ntasks == ${expected}"
762+
'
763+
764+
# N.B. As of this test writing, tasks-per-core uses
765+
# per-resource.type=core. But write direct test in case of future
766+
# changes.
767+
test_expect_success HAVE_JQ 'flux job list outputs ntasks correctly (cores / per-resource.type=core)' '
768+
ncores=$(flux resource list -s up -no {ncores}) &&
769+
jobid=$(flux mini submit --wait --cores=${ncores} \
770+
-o per-resource.type=core \
771+
-o per-resource.count=2 \
772+
hostname | flux job id) &&
773+
echo $jobid > taskcount10.id &&
774+
wait_jobid_state $jobid inactive &&
775+
obj=$(flux job list -s inactive | grep $jobid) &&
776+
expected=$((ncores * 2)) &&
777+
echo ${expected} > per_resource_type_core_ntasks2.exp &&
778+
echo $obj | jq -e ".ntasks == ${expected}"
779+
'
780+
714781
test_expect_success 'reload the job-list module' '
715782
flux module reload job-list
716783
'
@@ -721,6 +788,11 @@ test_expect_success HAVE_JQ 'verify task count preserved across restart' '
721788
jobid3=`cat taskcount3.id` &&
722789
jobid4=`cat taskcount4.id` &&
723790
jobid5=`cat taskcount5.id` &&
791+
jobid6=`cat taskcount6.id` &&
792+
jobid7=`cat taskcount7.id` &&
793+
jobid8=`cat taskcount8.id` &&
794+
jobid9=`cat taskcount9.id` &&
795+
jobid10=`cat taskcount10.id` &&
724796
obj=$(flux job list -s inactive | grep ${jobid1}) &&
725797
echo $obj | jq -e ".ntasks == 1" &&
726798
obj=$(flux job list -s inactive | grep ${jobid2}) &&
@@ -730,7 +802,20 @@ test_expect_success HAVE_JQ 'verify task count preserved across restart' '
730802
obj=$(flux job list -s inactive | grep ${jobid4}) &&
731803
echo $obj | jq -e ".ntasks == 3" &&
732804
obj=$(flux job list -s inactive | grep ${jobid5}) &&
733-
echo $obj | jq -e ".ntasks == 6"
805+
echo $obj | jq -e ".ntasks == 6" &&
806+
obj=$(flux job list -s inactive | grep ${jobid6}) &&
807+
expected=$(cat per_resource_type_node_ntasks.exp) &&
808+
echo $obj | jq -e ".ntasks == ${expected}" &&
809+
obj=$(flux job list -s inactive | grep ${jobid7}) &&
810+
echo $obj | jq -e ".ntasks == 8" &&
811+
obj=$(flux job list -s inactive | grep ${jobid8}) &&
812+
echo $obj | jq -e ".ntasks == 8" &&
813+
obj=$(flux job list -s inactive | grep ${jobid9}) &&
814+
expected=$(cat per_resource_type_core_ntasks1.exp) &&
815+
echo $obj | jq -e ".ntasks == ${expected}" &&
816+
obj=$(flux job list -s inactive | grep ${jobid10}) &&
817+
expected=$(cat per_resource_type_core_ntasks2.exp) &&
818+
echo $obj | jq -e ".ntasks == ${expected}"
734819
'
735820

736821
#

t/t2800-jobs-cmd.t

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1231,21 +1231,6 @@ test_expect_success HAVE_JQ 'flux jobs lists nnodes for pending jobs correctly'
12311231
test_cmp nnodesP.exp nnodesP.out
12321232
'
12331233

1234-
# over subscribe tasks onto nodes through workaround, ensure
1235-
# ntasks is larger than the tasks specified via -n option
1236-
test_expect_success 'flux jobs lists ntasks with per-resource type=node correctly' '
1237-
nnodes=$(flux resource list -s up -no {nnodes}) &&
1238-
ncores=$(flux resource list -s up -no {ncores}) &&
1239-
extra=$((ncores / nnodes + 2)) &&
1240-
id=$(flux mini submit -N ${nnodes} -n ${ncores} \
1241-
-o per-resource.type=node \
1242-
-o per-resource.count=${extra} \
1243-
/bin/true) &&
1244-
fj_wait_event ${id} clean &&
1245-
flux jobs -no "{ntasks}" ${id} > per_resource_ntasks.out &&
1246-
test $(cat per_resource_ntasks.out) -eq $((nnodes * extra))
1247-
'
1248-
12491234
#
12501235
# leave job cleanup to rc3
12511236
#

0 commit comments

Comments
 (0)