inclusionAI
diff --git a/‎README.md‎
Lines changed: 10 additions & 9 deletions b/‎README.md‎
Lines changed: 10 additions & 9 deletions
diff --git a/‎areal/api/cli_args.py‎
Lines changed: 5 additions & 2 deletions b/‎areal/api/cli_args.py‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎areal/engine/base_hf_engine.py‎
Lines changed: 1 addition & 4 deletions b/‎areal/engine/base_hf_engine.py‎
Lines changed: 1 addition & 4 deletions
diff --git a/‎areal/engine/sglang_remote.py‎
Lines changed: 19 additions & 1 deletion b/‎areal/engine/sglang_remote.py‎
Lines changed: 19 additions & 1 deletion
diff --git a/‎areal/engine/vllm_remote.py‎
Lines changed: 19 additions & 1 deletion b/‎areal/engine/vllm_remote.py‎
Lines changed: 19 additions & 1 deletion
diff --git a/‎areal/experimental/tests/test_megatron_engine.py‎
Lines changed: 2 additions & 2 deletions b/‎areal/experimental/tests/test_megatron_engine.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎areal/experimental/tests/test_openai.py‎
Lines changed: 2 additions & 2 deletions b/‎areal/experimental/tests/test_openai.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎areal/experimental/tests/test_sglang_local_engine.py‎
Lines changed: 2 additions & 2 deletions b/‎areal/experimental/tests/test_sglang_local_engine.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎areal/experimental/tests/torchrun/run_megatron_engine_distributed.py‎
Lines changed: 2 additions & 2 deletions b/‎areal/experimental/tests/torchrun/run_megatron_engine_distributed.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎areal/launcher/local.py‎
Lines changed: 67 additions & 59 deletions b/‎areal/launcher/local.py‎
Lines changed: 67 additions & 59 deletions
@@ -71,15 +71,16 @@ state-of-the-art 7B and 32B models for mathematical reasoning. Check out our
 
 ## 📚 Examples
 
-| Task                                           | Description                                                                          | Performance                                                                       |
-| ---------------------------------------------- | ------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------- |
-| **[Math](examples/math/)**                     | Mathematical problem solving (SFT, GRPO, or PPO)                                     | TBA                                                                               |
-| **[LoRA Math](examples/lora/)**                | Math Agent Trained With LoRA                                                         | TBA                                                                               |
-| **[VLM Math](examples/vlm/)**                  | CLEVR visual counting tasks                                                          | TBA                                                                               |
-| **[Reasoning](examples/countdown/)**           | Countdown numbers game with custom rewards                                           | [Training Curve](/examples/countdown/countdown_training_curve.png)                |
-| **[Search Agent](examples/search-agent/)**     | An agent with end-to-end reasoning, search, browsing, and summarization capabilities | [ASearcher Repo](https://github.com/inclusionAI/ASearcher)                        |
-| **[Tool-Integrated Reasoning](examples/tir/)** | An agent that can invoke tools during reasoning                                      | [TIR Example](https://github.com/inclusionAI/AReaL/tree/main/examples/tir)        |
-| **[RLHF](examples/alignment/)**                | RLHF for LLM Alignment                                                               | [RLHF Example](https://github.com/inclusionAI/AReaL/tree/main/examples/alignment) |
+| Task                                             | Description                                                                          | Performance                                                                       |
+| ------------------------------------------------ | ------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------- |
+| **[Math](examples/math/)**                       | Mathematical problem solving (SFT, GRPO, or PPO)                                     | TBA                                                                               |
+| **[Multi-Turn Math](examples/multi-turn-math/)** | Iterative mathematical problem solving with self-correction                          | [Training Curve](examples/multi-turn-math/reward_curve.png)                       |
+| **[LoRA Math](examples/lora/)**                  | Math Agent Trained With LoRA                                                         | TBA                                                                               |
+| **[VLM Math](examples/vlm/)**                    | CLEVR visual counting tasks                                                          | TBA                                                                               |
+| **[Reasoning](examples/countdown/)**             | Countdown numbers game with custom rewards                                           | [Training Curve](/examples/countdown/countdown_training_curve.png)                |
+| **[Search Agent](examples/search-agent/)**       | An agent with end-to-end reasoning, search, browsing, and summarization capabilities | [ASearcher Repo](https://github.com/inclusionAI/ASearcher)                        |
+| **[Tool-Integrated Reasoning](examples/tir/)**   | An agent that can invoke tools during reasoning                                      | [TIR Example](https://github.com/inclusionAI/AReaL/tree/main/examples/tir)        |
+| **[RLHF](examples/alignment/)**                  | RLHF for LLM Alignment                                                               | [RLHF Example](https://github.com/inclusionAI/AReaL/tree/main/examples/alignment) |
 
 ## 🔧 Support Matrix
 
 
@@ -10,6 +10,7 @@
 uvloop.install()
 from hydra import compose as hydra_compose
 from hydra import initialize as hydra_init
+from hydra.core.global_hydra import GlobalHydra
 from omegaconf import MISSING, DictConfig, OmegaConf
 
 from areal.platforms import current_platform
@@ -295,7 +296,7 @@ class TrainEngineConfig:
     lora_alpha: int = field(default=16, metadata={"help": "lora alpha"})
     target_modules: List[str] = field(
         default_factory=list,
-        metadata={"help": "lora target_modules. None defaults to 'all-linear'"},
+        metadata={"help": "lora target_modules."},
     )
     peft_type: str = field(
         default="lora",
@@ -541,7 +542,7 @@ class SGLangConfig:
     random_seed: int = 1
     skip_tokenizer_init: bool = False
     disable_cuda_graph: bool = False
-    disable_radix_cache: bool = False
+    disable_radix_cache: bool = True
     disable_cuda_graph_padding: bool = False
     enable_nccl_nvls: bool = False
     disable_outlines_disk_cache: bool = False
@@ -1148,6 +1149,8 @@ def parse_cli_args(argv: List[str]):
     assert config_file.exists(), f"Config file {config_file} does not exist."
     # hydra only recognize relative paths
     relpath = Path(os.path.relpath(str(config_file), Path(__file__).parent.absolute()))
+    if GlobalHydra.instance().is_initialized():
+        GlobalHydra.instance().clear()
     hydra_init(config_path=str(relpath.parent), job_name="app", version_base=None)
     cfg = hydra_compose(
         config_name=str(relpath.name).split(".yaml")[0],
 
@@ -117,10 +117,6 @@ def parallelism_group(self) -> dist.ProcessGroup:
 
     def create_process_group(self, parallel_strategy: ParallelStrategy | None = None):
         backend = current_platform.communication_backend
-        if current_platform.communication_backend == "nccl":
-            # Required by NCCL weight update group for SGLang
-            os.environ["NCCL_CUMEM_ENABLE"] = "0"
-            os.environ["NCCL_NVLS_ENABLE"] = "0"
         if not dist.is_initialized():
             # TODO: Handle the condition when WORLD_SIZE and RANK is not set in launcher
             # NOTE: device_id **SHOULD NOT** be passed into init_process_group,
@@ -320,6 +316,7 @@ def step_lr_scheduler(self):
 
     def prepare_mb_list(self, input_: Dict[str, Any]) -> MicroBatchList:
         assert "attention_mask" in input_ and "input_ids" in input_
+        input_ = input_.copy()
 
         if is_qwen2_vl_model(self.model_config.model_type):
             # Create the special t,h,w position IDs for qwen 2.5 VL
 
@@ -26,6 +26,7 @@
 from areal.platforms import current_platform
 from areal.utils import logging, name_resolve, names
 from areal.utils.http import arequest_with_retry, get_default_connector
+from areal.utils.launcher import wait_llm_server_addrs
 
 RID_CACHE_SIZE = 128
 
@@ -85,9 +86,26 @@ def initialize(
 
         if addr:
             self.addresses = addr if isinstance(addr, list) else [addr]
+            self.logger.info(f"Get server addresses from the `addr` argument.")
         else:
+            if (
+                self.config.experiment_name is not None
+                and self.config.trial_name is not None
+            ):
+                try:
+                    self.addresses = wait_llm_server_addrs(
+                        experiment_name=self.config.experiment_name,
+                        trial_name=self.config.trial_name,
+                        timeout=1,
+                    )
+                    self.logger.info(f"Get server addresses from name_resolve.")
+                except (TimeoutError, RuntimeError):
+                    # RuntimeError happens when name_resolve is not properly configured.
+                    pass
+        if not self.addresses and os.getenv("AREAL_LLM_SERVER_ADDRS"):
             # When addr is not provided, fallback to reading addrs from env var
-            self.addresses = os.getenv("AREAL_LLM_SERVER_ADDRS").split(",")
+            self.addresses = os.environ["AREAL_LLM_SERVER_ADDRS"].split(",")
+            self.logger.info(f"Get server addresses from environment variable.")
         if not self.addresses:
             raise RuntimeError(
                 "No configured SGLang servers. Please pass in SGLang server addresses by arguments "
 
@@ -26,6 +26,7 @@
 from areal.platforms import current_platform
 from areal.utils import logging, name_resolve, names
 from areal.utils.http import arequest_with_retry, get_default_connector
+from areal.utils.launcher import wait_llm_server_addrs
 
 RID_CACHE_SIZE = 128
 
@@ -90,9 +91,26 @@ def initialize(
 
         if addr:
             self.addresses = addr if isinstance(addr, list) else [addr]
+            self.logger.info(f"Get server addresses from the `addr` argument.")
         else:
+            if (
+                self.config.experiment_name is not None
+                and self.config.trial_name is not None
+            ):
+                try:
+                    self.addresses = wait_llm_server_addrs(
+                        experiment_name=self.config.experiment_name,
+                        trial_name=self.config.trial_name,
+                        timeout=1,
+                    )
+                    self.logger.info(f"Get server addresses from name_resolve.")
+                except (TimeoutError, RuntimeError):
+                    # RuntimeError happens when name_resolve is not properly configured.
+                    pass
+        if not self.addresses and os.getenv("AREAL_LLM_SERVER_ADDRS"):
             # When addr is not provided, fallback to reading addrs from env var
-            self.addresses = os.getenv("AREAL_LLM_SERVER_ADDRS").split(",")
+            self.addresses = os.environ["AREAL_LLM_SERVER_ADDRS"].split(",")
+            self.logger.info(f"Get server addresses from environment variable.")
         if not self.addresses:
             raise RuntimeError(
                 "No configured vLLM servers. Please pass in vLLM server addresses by arguments "
 
@@ -23,9 +23,9 @@
 logger = logging.getLogger("MegatronEngine Test")
 
 VOCAB_SIZE = 100
-MODEL_PATH = "/storage/testing/models/Qwen__Qwen3-1.7B/"
+MODEL_PATH = "/storage/openpsi/models/Qwen__Qwen3-0.6B/"
 if not os.path.exists(MODEL_PATH):
-    MODEL_PATH = "Qwen/Qwen3-1.7B"
+    MODEL_PATH = "Qwen/Qwen3-0.6B"
 
 
 @pytest.fixture(scope="module")
 
@@ -15,9 +15,9 @@
 
 EXPR_NAME = "test_openai"
 TRIAL_NAME = "trial_0"
-MODEL_PATH = "/storage/openpsi/models/Qwen__Qwen3-1.7B/"
+MODEL_PATH = "/storage/openpsi/models/Qwen__Qwen3-0.6B/"
 if not os.path.exists(MODEL_PATH):
-    MODEL_PATH = "Qwen/Qwen3-1.7B"
+    MODEL_PATH = "Qwen/Qwen3-0.6B"
 PORT, DIST_PORT = network.find_free_ports(2)
 HOST = network.gethostip()
 # set a large timeout since we may need to download the model from hub
 
@@ -29,9 +29,9 @@
 
 EXPR_NAME = "test_sglang_local_engine"
 TRIAL_NAME = "trial_0"
-MODEL_PATH = "/storage/testing/models/Qwen__Qwen3-1.7B/"
+MODEL_PATH = "/storage/openpsi/models/Qwen__Qwen3-0.6B/"
 if not os.path.exists(MODEL_PATH):
-    MODEL_PATH = "Qwen/Qwen2-0.5B"
+    MODEL_PATH = "Qwen/Qwen3-0.6B"
 
 
 def build_engine_config(**kwargs):
 
@@ -26,11 +26,11 @@
 from areal.utils.data import broadcast_tensor_container
 
 MODEL_PATHS = {
-    "qwen3": "/storage/openpsi/models/Qwen__Qwen3-1.7B/",
+    "qwen3": "/storage/openpsi/models/Qwen__Qwen3-0.6B/",
     "qwen3moe": "/storage/openpsi/models/Qwen__Qwen3-30B-A3B/",
 }
 HF_MODEL_PATHS = {
-    "qwen3": "Qwen/Qwen3-1.7B",
+    "qwen3": "Qwen/Qwen3-0.6B",
     # TODO: switch Qwen3MoE to smaller model initialized from scratch
     "qwen3moe": "Qwen/Qwen3-30B-A3B",
 }
 
@@ -22,8 +22,14 @@
 )
 from areal.platforms import current_platform
 from areal.utils import logging, name_resolve, names
-from areal.utils.launcher import JobException, JobInfo, JobState, get_env_vars
-from areal.utils.network import find_free_ports, gethostip
+from areal.utils.launcher import (
+    JobException,
+    JobInfo,
+    JobState,
+    get_env_vars,
+    wait_llm_server_addrs,
+)
+from areal.utils.network import find_free_ports
 from areal.utils.recover import check_if_recover
 
 logger = logging.getLogger("Local Scheduler")
@@ -136,7 +142,9 @@ def submit_array(
             )
             c = f"{c} 2>&1 | tee -a {self.log_path_of(job_name)}"
             logger.info("Starting local process with command: %s", c)
-            process = subprocess.Popen(c, shell=isinstance(c, str))
+            process = subprocess.Popen(
+                c, shell=isinstance(c, str), stdout=sys.stdout, stderr=sys.stdout
+            )
             self._jobs[f"{job_name}/{offset + i}"] = process
             self._job_counter[job_name] += 1
 
@@ -275,72 +283,65 @@ def local_main(config, run_id: int = 0):
         f"run_id={run_id}, is_recover_run={is_recover_run}"
     )
 
-    server_cmd = []
     server_addrs = []
-    if alloc_mode.gen_backend == "sglang":
-        base_seed = config.sglang.random_seed
-        config.sglang = to_structured_cfg(config.sglang, SGLangConfig)
-        ports = find_free_ports(alloc_mode.gen.dp_size * 2, port_range=(10000, 50000))
-        host_ip = gethostip()
-        host = "localhost" if not config.sglang.enable_metrics else host_ip
-        for i in range(alloc_mode.gen.dp_size):
-            config.sglang.random_seed = base_seed + i
-            cmd = SGLangConfig.build_cmd(
-                config.sglang,
-                host=host,
-                tp_size=alloc_mode.gen.tp_size,
-                base_gpu_id=0,
-                port=ports[i * 2],
-                dist_init_addr=f"localhost:{ports[i*2+1]}",
-            )
-            server_cmd.append(cmd)
-            server_addrs.append(f"{host}:{ports[i * 2]}")
-
-        # Launch inference servers.
-        launcher.submit_array(
-            job_name="llm_server",
-            cmd=server_cmd,
-            count=alloc_mode.gen.dp_size,
-            gpu=alloc_mode.gen.pp_size * alloc_mode.gen.tp_size,
-            env_vars=get_env_vars(
-                config.cluster.cluster_name,
-                config.launcher.inference_server_env_vars,
-            ),
-        )
-        logger.info(
-            f"LLM inference server launched at: AREAL_LLM_SERVER_ADDRS={','.join(server_addrs)}"
+    if alloc_mode.gen_backend in ("sglang", "vllm"):
+        # Launcher should launch llm servers according to allocation mode.
+        if alloc_mode.gen_backend == "sglang":
+            config.sglang = to_structured_cfg(config.sglang, SGLangConfig)
+            random_seed = config.sglang.random_seed
+        else:
+            config.vllm = to_structured_cfg(config.vllm, vLLMConfig)
+            random_seed = config.vllm.seed
+
+        backend_spec = {
+            "sglang": {
+                "module": "areal.launcher.sglang_server",
+                "seed_arg": "sglang.random_seed",
+                "set_device_env": False,
+            },
+            "vllm": {
+                "module": "areal.launcher.vllm_server",
+                "seed_arg": "vllm.seed",
+                "set_device_env": True,  # vLLM needs `device_control_env_var` to control GPU allocation
+            },
+        }
+
+        spec = backend_spec[alloc_mode.gen_backend]
+
+        base_seed = random_seed
+        seed_arg = spec["seed_arg"]
+        module = spec["module"]
+        server_cmd = (
+            f"python3 -m {module} {' '.join(sys.argv[1:])} {seed_arg}={base_seed}"
         )
-    elif alloc_mode.gen_backend == "vllm":
-        base_seed = config.vllm.seed
-        config.vllm = to_structured_cfg(config.vllm, vLLMConfig)
-        ports = find_free_ports(alloc_mode.gen.dp_size * 2, port_range=(10000, 50000))
-        host = "localhost"
-        for i in range(alloc_mode.gen.dp_size):
-            config.vllm.seed = base_seed + i
-            cmd = vLLMConfig.build_cmd(
-                config.vllm,
-                host=host,
-                tp_size=alloc_mode.gen.tp_size,
-                port=ports[i * 2],
-                dist_init_addr=f"localhost:{ports[i*2+1]}",
-            )
-            server_cmd.append(cmd)
-            server_addrs.append(f"{host}:{ports[i * 2]}")
 
         # Launch inference servers.
         launcher.submit_array(
             job_name="llm_server",
             cmd=server_cmd,
-            count=alloc_mode.gen.dp_size,
-            gpu=alloc_mode.gen.pp_size * alloc_mode.gen.tp_size,
+            count=1,
+            gpu=alloc_mode.gen.pp_size
+            * alloc_mode.gen.tp_size
+            * alloc_mode.gen.dp_size,
             env_vars=get_env_vars(
                 config.cluster.cluster_name,
                 config.launcher.inference_server_env_vars,
             ),
         )
-        logger.info(
-            f"LLM inference server launched at: AREAL_LLM_SERVER_ADDRS={','.join(server_addrs)}"
-        )
+
+        # Get llm server addresses by name resolve
+        try:
+            server_addrs = wait_llm_server_addrs(
+                config.experiment_name,
+                config.trial_name,
+                n_rollout_servers=alloc_mode.gen.dp_size,
+            )
+            logger.info(
+                f"LLM inference server launched at: AREAL_LLM_SERVER_ADDRS={','.join(server_addrs)}"
+            )
+        except (TimeoutError, KeyboardInterrupt) as e:
+            launcher.stop_all(signal="SIGINT")
+            raise e
 
     # Launch trainer entrypoint
     if alloc_mode.type_ != AllocationType.LLM_SERVER_ONLY:
@@ -349,6 +350,14 @@ def local_main(config, run_id: int = 0):
             nprocs = 1
         else:
             gpu = nprocs = alloc_mode.train.world_size
+        _env_vars = dict(
+            AREAL_LLM_SERVER_ADDRS=",".join(server_addrs),
+            AREAL_RECOVER_RUN=str(int(is_recover_run)),
+        )
+        if alloc_mode.gen_backend == "sglang":
+            # Required by NCCL weight update group.
+            _env_vars["NCCL_CUMEM_ENABLE"] = "0"
+            _env_vars["NCCL_NVLS_ENABLE"] = "0"
         launcher.submit(
             job_name="trainer",
             cmd=f"torchrun --nnodes 1 --nproc-per-node {nprocs} --master-addr localhost --master-port {find_free_ports(1, (10000, 50000))[0]} {' '.join(sys.argv[1:])}",
@@ -358,8 +367,7 @@ def local_main(config, run_id: int = 0):
                     config.cluster.cluster_name,
                     config.launcher.trainer_env_vars,
                 ),
-                AREAL_LLM_SERVER_ADDRS=",".join(server_addrs),
-                AREAL_RECOVER_RUN=str(int(is_recover_run)),
+                **_env_vars,
             ),
         )
Original file line number	Diff line number	Diff line change
`@@ -26,11 +26,11 @@`
`26`	`26`	`from areal.utils.data import broadcast_tensor_container`
`27`	`27`
`28`	`28`	`MODEL_PATHS = {`
`29`		`- "qwen3": "/storage/openpsi/models/Qwen__Qwen3-1.7B/",`
	`29`	`+ "qwen3": "/storage/openpsi/models/Qwen__Qwen3-0.6B/",`
`30`	`30`	`"qwen3moe": "/storage/openpsi/models/Qwen__Qwen3-30B-A3B/",`
`31`	`31`	`}`
`32`	`32`	`HF_MODEL_PATHS = {`
`33`		`- "qwen3": "Qwen/Qwen3-1.7B",`
	`33`	`+ "qwen3": "Qwen/Qwen3-0.6B",`
`34`	`34`	`# TODO: switch Qwen3MoE to smaller model initialized from scratch`
`35`	`35`	`"qwen3moe": "Qwen/Qwen3-30B-A3B",`
`36`	`36`	`}`