inclusionAI · garrett4wade · Oct 22, 2025 · Oct 22, 2025 · Oct 23, 2025 · Oct 24, 2025
diff --git a/areal/api/cli_args.py b/areal/api/cli_args.py
@@ -3,6 +3,7 @@
 import os
 from dataclasses import asdict, dataclass, field
 from pathlib import Path
+from typing import Any
 
 import uvloop
 import yaml
@@ -311,6 +312,52 @@ class MegatronEngineConfig:
     recompute_modules: list[str] | None = None
 
 
+@dataclass
+class SchedulingStrategy:
+    type: str = field(
+        default="separation", metadata={"choices": ["separation", "colocation"]}
+    )
+    target: str | None = field(
+        default=None, metadata={"help": "The target role to be colocated with"}
+    )
+
+
+@dataclass
+class SchedulingSpec:
+    cpu: int = field(default=0, metadata={"help": "Number of CPU cores required"})
+    gpu: int = field(default=0, metadata={"help": "Number of GPU units required"})
+    mem: int = field(default=0, metadata={"help": "Amount of memory (GB) required"})
+    port_count: int = field(default=2, metadata={"help": "Number of ports to expose"})
+    image: str = field(
+        default="", metadata={"help": "Docker/Singularity container image to use"}
+    )
+    type: str = field(
+        default="worker",
+        metadata={
+            "help": "Task type (e.g., worker, engine)",
+            "choices": ["worker", "engine"],
+        },
+    )
+    env_vars: dict[str, str] = field(
+        default_factory=dict,
+        metadata={"help": "Environment variables for the container"},
+    )
+    # cmd
+    cmd: str | None = field(
+        default=None,
+        metadata={
+            "help": "Command to execute inside the container. Defaults to AReaL's RPC server."
+        },
+    )
+    # slurm configurations from "https://slurm.schedmd.com/sbatch.html"
+    nodelist: str | None = None
+    exclude: str | None = None
+    partition: str | None = None
+    time_limit: str | None = None  # see  "--time" option for format
+    begin: str | None = None  # see "--begin" option for format
+    deadline: str | None = None  # see "--deadline" option for format
+
+
 @dataclass
 class TrainEngineConfig:
     """Core configuration for model training, including optimization and backend settings."""
@@ -384,6 +431,13 @@ class TrainEngineConfig:
         default="lora",
         metadata={"help": "peft method type. Only LoRA is supported for now."},
     )
+    scheduling_spec: SchedulingSpec = field(
+        default_factory=lambda: SchedulingSpec(
+            cmd="python -m areal.scheduler.rpc.rpc_server"
+        ),
+        metadata={"help": "train engine schedule specs"},
+    )
+    scheduling_strategy: SchedulingStrategy = field(default_factory=SchedulingStrategy)
 
 
 @dataclass
@@ -538,6 +592,24 @@ class PPOCriticConfig(TrainEngineConfig):
     )
 
 
+def get_py_cmd(module: str, args: dict[str, Any]):
+    # convert to flags
+    cmd = ["python3", "-m", module]
+    for k, v in args.items():
+        if v is None or v is False or v == "" or (isinstance(v, list) and not v):
+            continue
+        flag = f"--{k.replace('_', '-')}"
+        if v is True:
+            cmd.append(flag)
+        elif isinstance(v, list):
+            cmd.append(flag)
+            cmd.extend(map(str, v))
+        else:
+            cmd.append(flag)
+            cmd.append(str(v))
+    return cmd
+
+
 @dataclass
 class vLLMConfig:
     """Configuration for vLLM runtime. Refer to:
@@ -598,6 +670,10 @@ def build_args(
         )
         return args
 
+    @staticmethod
+    def build_cmd_from_args(args: dict[str, Any]):
+        return get_py_cmd("areal.thirdparty.vllm.areal_vllm_server", args)
+
     @staticmethod
     def build_cmd(
         vllm_config: "vLLMConfig",
@@ -615,18 +691,7 @@ def build_cmd(
             port=port,
             dist_init_addr=dist_init_addr,
         )
-        # convert to flags
-        flags = []
-        for k, v in args.items():
-            if v is None or v is False or v == "":
-                continue
-            if v is True:
-                flags.append(f"--{k.replace('_', '-')}")
-            elif isinstance(v, list):
-                flags.append(f"--{k.replace('_', '-')} {' '.join(map(str, v))}")
-            else:
-                flags.append(f"--{k.replace('_', '-')} {v}")
-        return f"python3 -m areal.thirdparty.vllm.areal_vllm_server {' '.join(flags)}"
+        return vLLMConfig.build_cmd_from_args(args)
 
 
 @dataclass
@@ -724,28 +789,19 @@ def build_cmd(
             node_rank=node_rank,
         )
 
-        # convert to flags
-        flags = []
-        for k, v in args.items():
-            if is_version_less("sglang", "0.4.10.post2") and "max_loaded_loras" in k:
-                continue
-            if v is None or v is False or v == "":
-                continue
-            if v is True:
-                flags.append(f"--{k.replace('_', '-')}")
-            elif isinstance(v, list):
-                flags.append(f"--{k.replace('_', '-')} {' '.join(map(str, v))}")
-            else:
-                flags.append(f"--{k.replace('_', '-')} {v}")
-        return f"python3 -m sglang.launch_server {' '.join(flags)}"
+        return SGLangConfig.build_cmd_from_args(args)
+
+    @staticmethod
+    def build_cmd_from_args(args: dict[str, Any]):
+        return get_py_cmd("sglang.launch_server", args)
 
     @staticmethod
     def build_args(
         sglang_config: "SGLangConfig",
-        tp_size,
-        base_gpu_id,
-        host,
-        port,
+        tp_size: int,
+        base_gpu_id: int,
+        host: str | None = None,
+        port: str | None = None,
         dist_init_addr: str | None = None,
         n_nodes: int = 1,
         node_rank: int = 0,
@@ -761,19 +817,17 @@ def build_args(
                 enable_multithread_load=sglang_config.enable_multithread_load,
                 enable_fast_load=sglang_config.enable_fast_load,
             )
-            args.pop("enable_multithread_load", None)
-            args.pop("enable_fast_load", None)
             args["model_loader_extra_config"] = json.dumps(
                 model_loader_extra_config, separators=(",", ":")
             )
+        args.pop("enable_multithread_load", None)
+        args.pop("enable_fast_load", None)
         # Map "all-linear" to "all"
         if "lora_target_modules" in args and args["lora_target_modules"]:
             args["lora_target_modules"] = [
                 x.replace("-linear", "") for x in args["lora_target_modules"]
             ]
         args = dict(
-            host=host,
-            port=port,
             # Model and tokenizer
             tokenizer_path=sglang_config.model_path,
             tokenizer_mode="auto",
@@ -791,8 +845,14 @@ def build_args(
             dist_init_addr=dist_init_addr,
             **args,
         )
+        if host is not None:
+            args["host"] = host
+        if port is not None:
+            args["port"] = port
         if not pkg_version.is_version_greater_or_equal("sglang", "0.4.9.post2"):
             raise RuntimeError("Needs sglang>=0.4.9.post2 to run the code.")
+        if is_version_less("sglang", "0.4.10.post2"):
+            args.pop("max_loaded_loras", None)
         return args
 
 
@@ -811,7 +871,7 @@ class InferenceEngineConfig:
     )
     queue_size: None | int = field(
         default=None,
-        metadata={"help": "Input/Output queue size for async rollout."},
+        metadata={"help": "(Deprecated) Input/Output queue size for async rollout."},
     )
     consumer_batch_size: int = field(
         default=1,
@@ -859,6 +919,13 @@ class InferenceEngineConfig:
             "help": "The grace period after calling /pause_generation. Wait until all requests have been dropped."
         },
     )
+    scheduling_spec: SchedulingSpec = field(
+        default_factory=lambda: SchedulingSpec(
+            cmd="python -m areal.scheduler.rpc.rpc_server"
+        ),
+        metadata={"help": "inference engine schedule specs"},
+    )
+    scheduling_strategy: SchedulingStrategy = field(default_factory=SchedulingStrategy)
 
 
 @dataclass