Add CLI option for system parameters and support for Matrix system (#27)

bvanessen · web-flow · commit 7f476d98a5d1 · 2025-03-10T09:39:07.000-07:00
* Added a system definition for Matrix and Vector.

* Fixed memory size

* Added command line argument to express the desired number of GPUs per
process (task).  For most AI codes, this should be 1, which is the
default, but it can now be set.

Updated the FLUX and SLURM schedulers to set this field when
necessary.  This also addresses an issue when running on compute
resources that can be shared and are not exclusive.

* Ran black.

* Added support for specifying a set of command line arguments for the
system parameters.  These will overwrite and known or autodetected
system parameters.
diff --git a/hpc_launcher/cli/common_args.py b/hpc_launcher/cli/common_args.py
@@ -23,6 +23,18 @@
 
 from dataclasses import fields
 
+class ParseKVAction(argparse.Action):
+    def __call__(self, parser, namespace, values, option_string=None):
+        setattr(namespace, self.dest, dict())
+        for each in values:
+            try:
+                key, value = each.split("=")
+                getattr(namespace, self.dest)[key] = value
+            except ValueError as ex:
+                message = "\nTraceback: {}".format(ex)
+                message += "\nError on '{}' || It should be 'key=value'".format(each)
+                raise argparse.ArgumentError(self, str(message))
+
 
 def create_scheduler_arguments(**kwargs) -> dict[str, str]:
     cmdline_args = {}
@@ -68,6 +80,13 @@ def setup_arguments(parser: argparse.ArgumentParser):
         help="Specifies the number of requested processes per node",
     )
 
+    group.add_argument(
+        "--gpus-per-proc",
+        type=int,
+        default=None,  # Internally, if there are GPUs, this will default to 1
+        help="Specifies the number of requested GPUs per process (default: 1)",
+    )
+
     group.add_argument("-q", "--queue", default=None, help="Specifies the queue to use")
 
     # Constraints
@@ -98,6 +117,21 @@ def setup_arguments(parser: argparse.ArgumentParser):
         help="Run locally (i.e., one process without a batch " "scheduler)",
     )
 
+    # System
+    group = parser.add_argument_group(
+        "System",
+        "Provide system parameters from the CLI -- overrides built-in system descriptions and autodetection",
+    )
+    group.add_argument(
+        "-p",
+        "--system-params",
+        dest="system_params",
+        nargs='+',
+        action=ParseKVAction,
+        help="Specifies some or all of the parameters of a system as a dictionary (note it will override any known or autodetected parameters): -p cores_per_node=<int> gpus_per_node=<int> gpu_arch=<str> mem_per_gpu=<float> numa_domains=<int> scheduler=<str>",
+        metavar="KEY1=VALUE1",
+    )
+
     # Schedule
     group = parser.add_argument_group(
         "Schedule", "Arguments that determine when a job will run"
@@ -206,7 +240,7 @@ def setup_arguments(parser: argparse.ArgumentParser):
 
 def validate_arguments(args: argparse.Namespace):
     """
-    Validation checks for the commong arguments. Raises exceptions on failure.
+    Validation checks for the common arguments. Raises exceptions on failure.
 
     :param args: The parsed arguments.
     """
@@ -259,12 +293,16 @@ def process_arguments(args: argparse.Namespace, logger: logging.Logger) -> Syste
     validate_arguments(args)
 
     # Set system and launch configuration based on arguments
-    system, args.nodes, args.procs_per_node = configure.configure_launch(
-        args.queue,
-        args.nodes,
-        args.procs_per_node,
-        args.gpus_at_least,
-        args.gpumem_at_least,
+    system, args.nodes, args.procs_per_node, args.gpus_per_proc = (
+        configure.configure_launch(
+            args.queue,
+            args.nodes,
+            args.procs_per_node,
+            args.gpus_per_proc,
+            args.gpus_at_least,
+            args.gpumem_at_least,
+            args.system_params,
+        )
     )
 
     return system
diff --git a/hpc_launcher/schedulers/flux.py b/hpc_launcher/schedulers/flux.py
@@ -80,6 +80,13 @@ def build_command_string_and_batch_script(
         if not blocking:
             header.write(f"# FLUX: {tmp}\n")
 
+        # Set the Number of GPUs per task
+        if self.gpus_per_proc > 0:
+            tmp = f"--gpus-per-task={self.gpus_per_proc}"
+            cmd_args += [tmp]
+            if not blocking:
+                header.write(f"#FLUX: {tmp}\n")
+
         if self.work_dir:
             tmp = [f"--setattr=system.cwd={os.path.abspath(self.work_dir)}"]
             self.select_interactive_or_batch(tmp, header, cmd_args, blocking)
diff --git a/hpc_launcher/schedulers/scheduler.py b/hpc_launcher/schedulers/scheduler.py
@@ -42,6 +42,8 @@ class Scheduler:
     nodes: int
     # Processes per node
     procs_per_node: int
+    # GPUs per Process (or task) if any
+    gpus_per_proc: int
     # Job name
     job_name: Optional[str] = None
     # Working directory (by default, uses current working directory)
diff --git a/hpc_launcher/schedulers/slurm.py b/hpc_launcher/schedulers/slurm.py
@@ -93,11 +93,18 @@ def build_command_string_and_batch_script(
             header.write(f"#SBATCH {tmp}\n")
 
         # Number of Tasks per node
-        tmp = f"--ntasks-per-node={self.nodes * self.procs_per_node}"
+        tmp = f"--ntasks-per-node={self.procs_per_node}"
         cmd_args += [tmp]
         if not blocking:
             header.write(f"#SBATCH {tmp}\n")
 
+        # Set the Number of GPUs per task
+        if self.gpus_per_proc > 0:
+            tmp = f"--gpus-per-task={self.gpus_per_proc}"
+            cmd_args += [tmp]
+            if not blocking:
+                header.write(f"#SBATCH {tmp}\n")
+
         if self.work_dir:
             tmp = [f"--chdir={os.path.abspath(self.work_dir)}"]
             self.select_interactive_or_batch(tmp, header, cmd_args, blocking)
diff --git a/hpc_launcher/systems/autodetect.py b/hpc_launcher/systems/autodetect.py
@@ -203,7 +203,7 @@ def autodetect_current_system(quiet: bool = False) -> System:
     if sys in ("tioga", "tuolumne", "elcap", "rzadams", "tenaya"):
         return ElCapitan(sys)
 
-    if sys == "ipa":
+    if sys in ("ipa", "matrix", "vector"):
         return CTS2(sys)
 
     if sys in ("lassen", "sierra", "rzanzel"):
diff --git a/hpc_launcher/systems/configure.py b/hpc_launcher/systems/configure.py
@@ -12,20 +12,26 @@
 #
 # SPDX-License-Identifier: (Apache-2.0)
 import logging
+from typing import Optional
+from dataclasses import dataclass, fields, asdict
 from hpc_launcher.systems import autodetect
-from hpc_launcher.systems.system import System
+from hpc_launcher.systems.system import System, SystemParams
 from hpc_launcher.utils import ceildiv
 
 logger = logging.getLogger(__name__)
 
+def convert_to_type_of_another(variable_to_convert, reference_variable):
+    return type(reference_variable)(variable_to_convert)
 
 def configure_launch(
     queue: str,
     nodes: int,
     procs_per_node: int,
+    gpus_per_proc: int,
     gpus_at_least: int,
     gpumem_at_least: int,
-) -> tuple[System, int, int]:
+    cli_system_params: Optional[tuple[int, int, str, float, int, str, Optional[float]]],
+) -> tuple[System, int, int, int]:
     """
     See if the system can be autodetected and then process some special
     arguments that can autoselect the number of ranks / GPUs.
@@ -47,9 +53,37 @@ def configure_launch(
     )
     system_params = system.system_parameters(queue)
 
+    # If any system parameters were provided on the command line, potentially overriding any known or discovered system parameters
+    if cli_system_params:
+        if not system_params: # Use a default set of system parameters
+            system_params = SystemParams()
+        _cli_system_params_dict = asdict(system_params)
+        for field in fields(system_params):
+            if field.name in cli_system_params:
+                _cli_system_params_dict[field.name] = convert_to_type_of_another(cli_system_params[field.name], _cli_system_params_dict[field.name])
+        # Create a new system_params with the proper fields overwritten
+        system_params = SystemParams(**_cli_system_params_dict)
+
+    if not gpus_per_proc:
+        gpus_per_proc = 0
+    if system_params is not None:
+        if gpus_per_proc == 0 and system_params.gpus_per_node > 0:
+            # If gpus_per_proc wasn't set and there are gpus on the node set it to a default of 1
+            gpus_per_proc = 1
+        if gpus_per_proc > system_params.gpus_per_node:
+            logger.info(
+                f"Requested number of GPUs per process {gpus_per_proc} exceeds the number of GPUs per node {system_params.gpus_per_node}"
+            )
+            gpus_per_proc = system_params.gpus_per_node
+
+        if procs_per_node * gpus_per_proc > system_params.gpus_per_node:
+            logger.info(
+                f"The combination of {procs_per_node} processes per node and {gpus_per_proc} GPUs per process exceeds the number of GPUs per node {system_params.gpus_per_node}"
+            )
+
     # If the user requested a specific number of processes per node, honor that
     if nodes and procs_per_node:
-        return system, nodes, procs_per_node
+        return system, nodes, procs_per_node, gpus_per_proc
 
     # Otherwise, if there is a valid set of system parameters, try to fill in
     # the blanks provided by the user
@@ -69,5 +103,7 @@ def configure_launch(
             nodes = 1
         if not procs_per_node:
             procs_per_node = 1
+        if not gpus_per_proc:
+            gpus_per_proc = 1
 
-    return system, nodes, procs_per_node
+    return system, nodes, procs_per_node, gpus_per_proc
diff --git a/hpc_launcher/systems/lc/cts2.py b/hpc_launcher/systems/lc/cts2.py
@@ -16,16 +16,25 @@
 from hpc_launcher.systems.system import System, SystemParams
 import os
 
+_h100_node = SystemParams(112, 4, "sm_90", 80.0, 8, "slurm")
 
 # Known LC systems
 _system_params = {
     "ipa": (
         "a100",
         {
-            "a100": SystemParams(32, 2, "sm_80", 40, 1, "slurm"),
-            "aa100": SystemParams(16, 2, "sm_80", 40, 2, "slurm"),
-            "av100": SystemParams(32, 2, "sm_70", 32, 2, "slurm"),
-            "v100": SystemParams(16, 2, "sm_70", 32, 2, "slurm"),
+            "a100": SystemParams(32, 2, "sm_80", 40.0, 1, "slurm"),
+            "aa100": SystemParams(16, 2, "sm_80", 40.0, 2, "slurm"),
+            "av100": SystemParams(32, 2, "sm_70", 32.0, 2, "slurm"),
+            "v100": SystemParams(16, 2, "sm_70", 32.0, 2, "slurm"),
+        },
+    ),
+    "matrix": (
+        "pbatch",
+        {
+            "pbatch": _h100_node,
+            "pdebug": _h100_node,
+            "erl": _h100_node,
         },
     ),
 }
diff --git a/hpc_launcher/systems/lc/el_capitan_family.py b/hpc_launcher/systems/lc/el_capitan_family.py
@@ -18,9 +18,9 @@
 
 
 # Known LC systems
-_mi250x_node = SystemParams(64, 8, "gfx90a", 64, 4, "flux")
+_mi250x_node = SystemParams(64, 8, "gfx90a", 64.0, 4, "flux")
 # APUs can run into a snarl where they OOM if too much GPU memory is allocated
-_mi300a_node = SystemParams(96, 4, "gfx942", 128, 4, "flux", 0.8)
+_mi300a_node = SystemParams(96, 4, "gfx942", 128.0, 4, "flux", 0.8)
 _system_params = {
     "tioga": (
         "pdebug",
diff --git a/hpc_launcher/systems/lc/sierra_family.py b/hpc_launcher/systems/lc/sierra_family.py
@@ -5,7 +5,7 @@
 
 
 # Supported LC systems
-_sierra_node = SystemParams(16, 4, "sm_70", 16, 2, "lsf")
+_sierra_node = SystemParams(16, 4, "sm_70", 16.0, 2, "lsf")
 _system_params = {
     "lassen": (
         "pbatch",
diff --git a/hpc_launcher/systems/system.py b/hpc_launcher/systems/system.py
@@ -29,17 +29,17 @@ class SystemParams:
     """Simple data structure to describe an LC system."""
 
     # Number of CPU cores per compute node
-    cores_per_node: int
+    cores_per_node: int = 1
     # Number of GPUs per node
-    gpus_per_node: int
+    gpus_per_node: int = 0
     # Vendor specific GPU compiler architecture
-    gpu_arch: str
+    gpu_arch: str = None
     # Number of GB of memory per GPU
-    mem_per_gpu: float
+    mem_per_gpu: float = 0.0
     # Number of NUMA domains
-    numa_domains: int
+    numa_domains: int = 1
     # String name of the Scheduler class
-    scheduler: str
+    scheduler: str = None
     # Optional system level guard to limit GPU/APU memory utilization
     fraction_max_gpu_mem: Optional[float] = 1.0