diff --git a/.github/workflows/docker/docker-compose.yaml b/.github/workflows/docker/docker-compose.yaml index 3ac2071f79..b6756f659a 100644 --- a/.github/workflows/docker/docker-compose.yaml +++ b/.github/workflows/docker/docker-compose.yaml @@ -1,6 +1,6 @@ services: trinity-node-1: - image: trinity-rft-unittest:20250924 + image: trinity-rft-unittest:20251030 pull_policy: never command: sh -c "pip install -e .[dev] && ray start --head --dashboard-host 0.0.0.0 --include-dashboard true --block" environment: @@ -29,7 +29,7 @@ services: capabilities: [gpu] trinity-node-2: - image: trinity-rft-unittest:20250924 + image: trinity-rft-unittest:20251030 pull_policy: never command: sh -c "pip install -e .[dev] && ray start --address=trinity-node-1:6379 --block" environment: diff --git a/.github/workflows/unittest.yaml b/.github/workflows/unittest.yaml index 435db3d362..5f1db13eae 100644 --- a/.github/workflows/unittest.yaml +++ b/.github/workflows/unittest.yaml @@ -97,6 +97,15 @@ jobs: fi fi + - name: Convert report.json time to ms + working-directory: trinity-${{ github.run_id }} + if: env.tests_run == 'true' || failure() + run: | + REPORT=report.json + if [ -f "$REPORT" ]; then + jq '(.results.tests[] | .duration, .start, .stop) |= (. * 1000) | (.results.summary.start, .results.summary.stop) |= (. * 1000)' "$REPORT" > "$REPORT.tmp" && mv "$REPORT.tmp" "$REPORT" + fi + - name: Clean checkpoint dir working-directory: trinity-${{ github.run_id }}/.github/workflows/docker if: always() diff --git a/benchmark/config/countdown-template.yaml b/benchmark/config/countdown-template.yaml index 2bd2e75259..5f1c9801c5 100644 --- a/benchmark/config/countdown-template.yaml +++ b/benchmark/config/countdown-template.yaml @@ -54,7 +54,6 @@ explorer: rollout_model: engine_num: 2 tensor_parallel_size: 1 - use_v1: true enforce_eager: true enable_prefix_caching: false enable_chunked_prefill: false diff --git a/benchmark/config/gsm8k-template.yaml b/benchmark/config/gsm8k-template.yaml index 59b96250a1..9e602bfe52 100644 --- a/benchmark/config/gsm8k-template.yaml +++ b/benchmark/config/gsm8k-template.yaml @@ -59,7 +59,6 @@ explorer: rollout_model: engine_num: 2 tensor_parallel_size: 1 - use_v1: true enforce_eager: false enable_prefix_caching: false enable_chunked_prefill: false diff --git a/docs/sphinx_doc/source/conf.py b/docs/sphinx_doc/source/conf.py index 10e7e42e78..cffc44d498 100644 --- a/docs/sphinx_doc/source/conf.py +++ b/docs/sphinx_doc/source/conf.py @@ -5,6 +5,8 @@ # -- Project information ----------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information +import subprocess + from trinity import __version__ as version project = "Trinity-RFT" @@ -58,11 +60,22 @@ apidoc_excluded_paths = ["tests", "build"] apidoc_separate_modules = True + # Multiversion configs -smv_tag_whitelist = r"^v\d+\.\d+\.\d+$" # match v1.0.0 pattern +def get_recent_tags(n: int) -> list: + """Retrieve the most recent n git tags.""" + try: + tags = subprocess.check_output( + ["git", "tag", "--sort=-creatordate"], universal_newlines=True + ).splitlines() + return tags[:n] + except subprocess.CalledProcessError: + return [] + + +smv_tag_whitelist = r"^(" + "|".join(get_recent_tags(4)) + r")$" smv_branch_whitelist = r"^(main)$" # included branches smv_remote_whitelist = None -smv_released_pattern = r"^tags/.*$" smv_prefer_remote_refs = False diff --git a/pyproject.toml b/pyproject.toml index 24cef021de..6353e3e485 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,7 @@ requires-python = ">=3.10,<3.13" dependencies = [ "verl==0.5.0", "ray[default]>=2.48.0", - "vllm>=0.9.1,<=0.10.2", + "vllm>=0.9.1,<=0.11.0", "tensordict", "wandb", "omegaconf", @@ -73,7 +73,7 @@ dev = [ ] megatron = [ "megatron-core[mlm]==0.13.1", - "transformer_engine[pytorch]==2.6.0.post1", + "transformer_engine[pytorch]==2.8.0", "mbridge>=0.13.0", ] diff --git a/scripts/docker/Dockerfile b/scripts/docker/Dockerfile index dfb6854240..60fa9ab52a 100644 --- a/scripts/docker/Dockerfile +++ b/scripts/docker/Dockerfile @@ -5,7 +5,7 @@ # docker run -it --gpus all --shm-size="64g" --rm -v $PWD:/workspace -v :/data trinity-rft:latest -FROM nvcr.io/nvidia/cuda:12.6.3-cudnn-devel-ubuntu22.04 +FROM nvcr.io/nvidia/cuda:12.8.1-cudnn-devel-ubuntu22.04 WORKDIR /workspace @@ -20,13 +20,13 @@ RUN apt update && apt install -y \ # For Aliyun users: update pip mirror to aliyun to speed up pip install -RUN pip config set global.index-url http://mirrors.cloud.aliyuncs.com/pypi/simple/ \ - && pip config set install.trusted-host mirrors.cloud.aliyuncs.com +# RUN pip config set global.index-url http://mirrors.cloud.aliyuncs.com/pypi/simple/ \ +# && pip config set install.trusted-host mirrors.cloud.aliyuncs.com # copy the Trinity-RFT dir into the workspace COPY . . -RUN pip install --upgrade pip && pip install -e .[dev] && pip install flash-attn +RUN pip install --upgrade pip && pip install -e .[dev] && pip install flash_attn==2.8.1 --no-build-isolation # Set Env variables diff --git a/scripts/docker_for_megatron/Dockerfile b/scripts/docker_for_megatron/Dockerfile index ef0c5e2137..d9294492b7 100644 --- a/scripts/docker_for_megatron/Dockerfile +++ b/scripts/docker_for_megatron/Dockerfile @@ -5,13 +5,10 @@ # docker run -it --gpus all --shm-size="64g" --rm -v $PWD:/workspace -v :/data trinity-rft-megatron:latest -FROM nvcr.io/nvidia/cuda:12.6.3-cudnn-devel-ubuntu22.04 +FROM nvcr.io/nvidia/cuda:12.8.1-cudnn-devel-ubuntu22.04 WORKDIR /workspace -# copy the Trinity-RFT dir into the workspace -COPY . . - RUN apt update && apt install -y \ build-essential \ curl git wget vim tmux net-tools \ @@ -22,17 +19,21 @@ RUN apt update && apt install -y \ && ln -sf /usr/bin/pip3 /usr/bin/pip # For Aliyun users: update pip mirror to aliyun to speed up pip install -RUN pip config set global.index-url http://mirrors.cloud.aliyuncs.com/pypi/simple/ \ - && pip config set install.trusted-host mirrors.cloud.aliyuncs.com +# RUN pip config set global.index-url http://mirrors.cloud.aliyuncs.com/pypi/simple/ \ +# && pip config set install.trusted-host mirrors.cloud.aliyuncs.com + +# copy the Trinity-RFT dir into the workspace +COPY . . # Install Trinity-RFT with Megatron RUN pip install --upgrade pip \ + && pip install -e .[dev] \ + && pip install flash_attn==2.8.1 --no-build-isolation \ && pip install -e .[megatron] \ - && pip install flash-attn==2.8.1 \ && pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation \ --config-settings "--build-option=--cpp_ext" \ --config-settings "--build-option=--cuda_ext" \ - --resume-retries 999 git+https://github.com/NVIDIA/apex.git + --resume-retries 20 git+https://github.com/NVIDIA/apex.git # Set Env variables diff --git a/tests/common/vllm_test.py b/tests/common/vllm_test.py index 709abba170..024da69117 100644 --- a/tests/common/vllm_test.py +++ b/tests/common/vllm_test.py @@ -276,7 +276,6 @@ def setUp(self): self.config.explorer.rollout_model.engine_type = "vllm" self.config.explorer.rollout_model.engine_num = 1 self.config.explorer.rollout_model.tensor_parallel_size = 1 - self.config.explorer.rollout_model.use_v1 = True self.config.explorer.rollout_model.chat_template = CHAT_TEMPLATE self.config.explorer.rollout_model.enable_openai_api = True @@ -368,7 +367,6 @@ def setUp(self): self.config.explorer.rollout_model.engine_type = "vllm" self.config.explorer.rollout_model.engine_num = 1 self.config.explorer.rollout_model.tensor_parallel_size = 1 - self.config.explorer.rollout_model.use_v1 = True self.config.explorer.rollout_model.chat_template = CHAT_TEMPLATE self.config.explorer.rollout_model.enable_openai_api = True @@ -578,7 +576,6 @@ def setUp(self): self.config.explorer.rollout_model.engine_type = "vllm" self.config.explorer.rollout_model.engine_num = 1 self.config.explorer.rollout_model.tensor_parallel_size = 1 - self.config.explorer.rollout_model.use_v1 = True self.config.explorer.rollout_model.chat_template = CHAT_TEMPLATE self.config.explorer.rollout_model.enable_openai_api = True # added for toolcalls diff --git a/tests/template/config.yaml b/tests/template/config.yaml index 13b2ad081f..0f085a25ea 100644 --- a/tests/template/config.yaml +++ b/tests/template/config.yaml @@ -42,7 +42,6 @@ explorer: enforce_eager: true dtype: bfloat16 seed: 42 - use_v1: true trainer: trainer_type: verl save_interval: 100 diff --git a/trinity/common/models/api/vllm_patch.py b/trinity/common/models/api/vllm_patch.py index fdbb2088a3..035500591c 100644 --- a/trinity/common/models/api/vllm_patch.py +++ b/trinity/common/models/api/vllm_patch.py @@ -347,10 +347,10 @@ async def run_api_server_in_ray_actor( reasoning_parser: Optional[str] = None, ): vllm_version = get_vllm_version() - if vllm_version < parse_version("0.8.5") or vllm_version > parse_version("0.10.2"): + if vllm_version < parse_version("0.8.5") or vllm_version > parse_version("0.11.0"): raise ValueError( f"Unsupported vllm version: {vllm.__version__}. " - "This patch requires vllm version >= 0.8.5, <= 0.10.2." + "This patch requires vllm version >= 0.8.5, <= 0.11.0." ) parser = FlexibleArgumentParser(description="Run the OpenAI API server.") @@ -371,5 +371,6 @@ async def run_api_server_in_ray_actor( if reasoning_parser: cli_args.extend(["--reasoning-parser", reasoning_parser]) args = parser.parse_args(cli_args) - print(args) + if vllm_version >= parse_version("0.11.0"): + args.structured_outputs_config.reasoning_parser = reasoning_parser await run_server_in_ray(args, async_llm) diff --git a/trinity/common/models/vllm_model.py b/trinity/common/models/vllm_model.py index 9436bc7e06..5b22dddb85 100644 --- a/trinity/common/models/vllm_model.py +++ b/trinity/common/models/vllm_model.py @@ -91,6 +91,8 @@ def __init__( engine_args.enable_log_requests = False else: engine_args.disable_log_requests = True + if get_vllm_version() >= parse_version("0.11.0"): + engine_args.reasoning_parser = config.reasoning_parser self.async_llm = vllm.AsyncLLMEngine.from_engine_args(engine_args) self.processor = None self.tokenizer = None @@ -107,12 +109,7 @@ def __init__( async def _initialize_tokenizer(self): if self.tokenizer is None: - if self.enable_lora: - self.tokenizer = await self.async_llm.get_tokenizer( - lora_request=self.get_lora_request() - ) - else: - self.tokenizer = await self.async_llm.get_tokenizer() + self.tokenizer = await self.async_llm.get_tokenizer() self.tokenizer.truncation_side = "left" def _initialize_processor(self): diff --git a/trinity/explorer/scheduler.py b/trinity/explorer/scheduler.py index fecde5e61b..ae17649c86 100644 --- a/trinity/explorer/scheduler.py +++ b/trinity/explorer/scheduler.py @@ -52,7 +52,7 @@ def _create_runner(self): return ( ray.remote(WorkflowRunner) .options( - num_cpus=1, + num_cpus=0, namespace=self.namespace, scheduling_strategy="SPREAD", runtime_env={ diff --git a/trinity/trainer/trainer.py b/trinity/trainer/trainer.py index a0e455d50e..fcea2b111a 100644 --- a/trinity/trainer/trainer.py +++ b/trinity/trainer/trainer.py @@ -153,7 +153,7 @@ def need_save(self) -> bool: async def sync_weight(self) -> Dict: """Sync the model weight.""" - self.logger.info(f"Trainer synchronizing weights at step {self.train_step_num} starting..") + self.logger.info(f"Trainer sync_weights at step {self.train_step_num} started.") metrics = {} with Timer(metrics, "time/sync_weight"): if self.config.synchronizer.sync_method == SyncMethod.NCCL: @@ -161,7 +161,7 @@ async def sync_weight(self) -> Dict: "trainer", self.train_step_num ) if result is None: - self.logger.error("Trainer synchronizing weights failed.") + self.logger.error("Trainer sync_weights failed.") else: self.engine.sync_weight() self.last_trainer_sync_step = self.train_step_num @@ -171,7 +171,7 @@ async def sync_weight(self) -> Dict: self.engine.upload_state_dict() self.last_sync_step = self.train_step_num await self.synchronizer.set_trainer_status.remote(RunningStatus.RUNNING) - self.logger.info(f"Trainer synchronizing weights at step {self.train_step_num} end.") + self.logger.info(f"Trainer sync_weights at step {self.train_step_num} finished.") return metrics def _log_experiences(self, samples: List[Dict]) -> None: