Skip to content

Commit 3de28df

Browse files
committed
flux-mini: submit,run: add per-resource allocation options
Problem: A common use case for simple run and submit commands is to allocate a set of nodes and run a set number of tasks per node, but this is not easily possible with flux-mini run and submit. Add a set of "per resource" allocation options to flux-mini run,submit. If any of these options are used, then the command uses the new JobspecV1.per_node() constructor to build the submitted jobspec. This lets users easily decide if they want to run a set number of tasks with a given amount of resources per task, or allocate a set of resources with a given number of tasks per resource.
1 parent ce49cd8 commit 3de28df

File tree

1 file changed

+119
-11
lines changed

1 file changed

+119
-11
lines changed

src/cmd/flux-mini.py

Lines changed: 119 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,10 @@ class Xcmd:
228228
"nodes": "-N",
229229
"cores_per_task": "-c",
230230
"gpus_per_task": "-g",
231+
"cores": "--cores=",
232+
"tasks_per_node": "--tasks-per-node=",
233+
"tasks_per_core": "--tasks-per-core=",
234+
"gpus_per_node": "--gpus-per-node=",
231235
"time_limit": "-t",
232236
"env": "--env=",
233237
"env_file": "--env-file=",
@@ -745,7 +749,6 @@ def __init__(self):
745749
"-c",
746750
"--cores-per-task",
747751
metavar="N",
748-
default=1,
749752
help="Number of cores to allocate per task",
750753
)
751754
group.add_argument(
@@ -754,6 +757,31 @@ def __init__(self):
754757
metavar="N",
755758
help="Number of GPUs to allocate per task",
756759
)
760+
group = self.parser.add_argument_group(
761+
"Per resource options",
762+
"The following options allow per-resource specification of "
763+
+ "tasks, and should not be used with per-task options above",
764+
)
765+
group.add_argument(
766+
"--cores",
767+
metavar="N",
768+
help="Request a total number of cores",
769+
)
770+
group.add_argument(
771+
"--tasks-per-node",
772+
metavar="N",
773+
help="Force number of tasks per node",
774+
)
775+
group.add_argument(
776+
"--tasks-per-core",
777+
metavar="N",
778+
help="Force number of tasks per core",
779+
)
780+
group.add_argument(
781+
"--gpus-per-node",
782+
metavar="N",
783+
help="Request a number of GPUs per node with --nodes",
784+
)
757785
self.parser.add_argument(
758786
"-v",
759787
"--verbose",
@@ -762,24 +790,28 @@ def __init__(self):
762790
help="Increase verbosity on stderr (multiple use OK)",
763791
)
764792

793+
# pylint: disable=too-many-branches
765794
def init_jobspec(self, args):
795+
per_resource_type = None
796+
per_resource_count = None
797+
766798
if not args.command:
767799
raise ValueError("job command and arguments are missing")
768800

769-
# If ntasks not set, then set it to either node count, with
770-
# exclusive flag enabled, or to 1 (the default).
771-
if not args.ntasks:
772-
if args.nodes:
773-
args.ntasks = args.nodes
774-
args.exclusive = True
775-
else:
776-
args.ntasks = 1
777-
778801
# Ensure integer args are converted to int() here.
779802
# This is done because we do not use type=int in argparse in order
780803
# to allow these options to be mutable for bulksubmit:
781804
#
782-
for arg in ["ntasks", "nodes", "cores_per_task", "gpus_per_task"]:
805+
for arg in [
806+
"ntasks",
807+
"nodes",
808+
"cores",
809+
"cores_per_task",
810+
"gpus_per_task",
811+
"tasks_per_node",
812+
"tasks_per_core",
813+
"gpus_per_node",
814+
]:
783815
value = getattr(args, arg)
784816
if value:
785817
try:
@@ -788,6 +820,82 @@ def init_jobspec(self, args):
788820
opt = arg.replace("_", "-")
789821
raise ValueError(f"--{opt}: invalid int value '{value}'")
790822

823+
if args.tasks_per_node is not None and args.tasks_per_core is not None:
824+
raise ValueError(
825+
"Do not specify both the number of tasks per node and per core"
826+
)
827+
828+
# Handle --tasks-per-node or --tasks-per-core (it is an error to
829+
# specify both). Check options for validity and assign the
830+
# per_resource variable when valid.
831+
#
832+
if args.tasks_per_node is not None or args.tasks_per_core is not None:
833+
if args.tasks_per_node is not None:
834+
if args.tasks_per_node < 1:
835+
raise ValueError("--tasks-per-node must be >= 1")
836+
837+
per_resource_type = "node"
838+
per_resource_count = args.tasks_per_node
839+
elif args.tasks_per_core is not None:
840+
if args.tasks_per_core < 1:
841+
raise ValueError("--tasks-per-core must be >= 1")
842+
per_resource_type = "core"
843+
per_resource_count = args.tasks_per_core
844+
845+
if args.gpus_per_node:
846+
if not args.nodes:
847+
raise ValueError("--gpus-per-node requires --nodes")
848+
849+
# If any of --tasks-per-node, --tasks-per-core, --cores, or
850+
# --gpus-per-node is used, then use the per_resource constructor:
851+
#
852+
if (
853+
per_resource_type is not None
854+
or args.gpus_per_node is not None
855+
or args.cores is not None
856+
):
857+
# If any of the per-task options was also specified, raise an
858+
# error here instead of silently ignoring those options:
859+
if (
860+
args.ntasks is not None
861+
or args.cores_per_task is not None
862+
or args.gpus_per_task
863+
):
864+
raise ValueError(
865+
"Per-resource options can't be used with per-task options."
866+
+ " (See --help for details)"
867+
)
868+
869+
# In per-resource mode, set the exclusive flag if nodes is
870+
# specified without cores. This preserves the default behavior
871+
# of requesting nodes exclusively when only -N is used:
872+
if args.nodes and args.cores is None:
873+
args.exclusive = True
874+
875+
return JobspecV1.per_resource(
876+
args.command,
877+
ncores=args.cores,
878+
nnodes=args.nodes,
879+
per_resource_type=per_resource_type,
880+
per_resource_count=per_resource_count,
881+
gpus_per_node=args.gpus_per_node,
882+
exclusive=args.exclusive,
883+
)
884+
885+
# If ntasks not set, then set it to node count, with
886+
# exclusive flag enabled
887+
if not args.ntasks and args.nodes:
888+
args.ntasks = args.nodes
889+
args.exclusive = True
890+
891+
# O/w default ntasks for from_command() is 1:
892+
if not args.ntasks:
893+
args.ntasks = 1
894+
895+
# default cores_per_task for from_command() is 1:
896+
if not args.cores_per_task:
897+
args.cores_per_task = 1
898+
791899
return JobspecV1.from_command(
792900
args.command,
793901
num_tasks=args.ntasks,

0 commit comments

Comments
 (0)