Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/docker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ env:
jobs:
build-and-push-image:
runs-on: self-hosted
timeout-minutes: 240 # wait up to 4 hours
timeout-minutes: 480 # wait up to 8 hours
# Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job.
permissions:
contents: read
Expand Down Expand Up @@ -61,8 +61,8 @@ jobs:
with:
context: trinity-${{ github.run_id }}
push: true
file: trinity-${{ github.run_id }}/scripts/docker/Dockerfile
shm-size: 64g
file: trinity-${{ github.run_id }}/scripts/docker/Dockerfile.uv
shm-size: 128g
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/docker/docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
services:
trinity-node-1:
image: trinity-rft-unittest:20251030
image: trinity-rft-unittest:20251225
pull_policy: never
command: sh -c "pip install -e .[dev] && ray start --head --dashboard-host 0.0.0.0 --include-dashboard true --block"
environment:
Expand Down Expand Up @@ -29,7 +29,7 @@ services:
capabilities: [gpu]

trinity-node-2:
image: trinity-rft-unittest:20251030
image: trinity-rft-unittest:20251225
pull_policy: never
command: sh -c "pip install -e .[dev] && ray start --address=trinity-node-1:6379 --block"
environment:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pre-commit.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ jobs:
- uses: actions/checkout@v4
- uses: actions/setup-python@v4
with:
python-version: '3.10'
python-version: '3.12'
- uses: pre-commit/[email protected]
8 changes: 4 additions & 4 deletions .github/workflows/unittest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
MAX_RETRIES=20
RETRY_INTERVAL=5
for i in $(seq 1 $MAX_RETRIES); do
docker compose exec trinity-node-1 ray status && docker compose exec trinity-node-2 ray status && break
docker compose exec trinity-node-1 bash -c "source /opt/venv/bin/activate && ray status" && docker compose exec trinity-node-2 bash -c "source /opt/venv/bin/activate && ray status" && break
echo "Waiting for ray cluster to be ready... ($i/$MAX_RETRIES)"
sleep $RETRY_INTERVAL
if [ "$i" -eq "$MAX_RETRIES" ]; then
Expand Down Expand Up @@ -76,12 +76,12 @@ jobs:
TYPE="${{ steps.test_type.outputs.type }}"
if [ "$TYPE" = "all" ]; then
echo "tests_run=true" >> $GITHUB_ENV
docker compose exec trinity-node-1 pytest tests -v -s --ctrf report.json
docker compose exec trinity-node-1 bash -c "source /opt/venv/bin/activate && pytest tests -v -s --ctrf report.json"
elif [ "$TYPE" = "diff" ]; then
if [ -s ../../../test_dirs.txt ]; then
echo "tests_run=true" >> $GITHUB_ENV
TEST_DIRS=$(cat ../../../test_dirs.txt | xargs)
docker compose exec trinity-node-1 pytest $TEST_DIRS -v -s --ctrf report.json
docker compose exec trinity-node-1 bash -c "source /opt/venv/bin/activate && pytest $TEST_DIRS -v -s --ctrf report.json"
else
echo "No changed modules detected, skipping tests."
echo "tests_run=false" >> $GITHUB_ENV
Expand All @@ -90,7 +90,7 @@ jobs:
MODULE="${{ steps.test_type.outputs.module }}"
if [ -n "$MODULE" ]; then
echo "tests_run=true" >> $GITHUB_ENV
docker compose exec trinity-node-1 pytest tests/$MODULE -v -s --ctrf report.json
docker compose exec trinity-node-1 bash -c "source /opt/venv/bin/activate && pytest tests/$MODULE -v -s --ctrf report.json"
else
echo "No module specified, skipping tests."
echo "tests_run=false" >> $GITHUB_ENV
Expand Down
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "trinity-rft"
version = "0.3.3"
version = "0.4.0"
authors = [
{name="Trinity-RFT Team", email="[email protected]"},
]
Expand Down Expand Up @@ -73,6 +73,8 @@ dev = [
]
megatron = [
"megatron-core[mlm]==0.13.1",
# if you found "undefined symbol" error in transformer engine
# reinstall it with --no-build-isolation and `--no-cache-dir` flag
"transformer_engine[pytorch]==2.8.0",
"mbridge>=0.13.0",
]
Expand Down
6 changes: 4 additions & 2 deletions scripts/docker/Dockerfile.uv
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,10 @@ RUN . /opt/venv/bin/activate && \

# Install flash_attn and Megatron
RUN . /opt/venv/bin/activate && \
uv pip install flash_attn==2.8.1 --no-cache-dir && \
uv pip install -e .[megatron] && \
uv pip install flash_attn==2.8.1 --no-build-isolation && \
uv pip install megatron-core[mlm]==0.13.1 && \
uv pip install transformer_engine[pytorch]==2.8.0 --no-build-isolation && \
uv pip install mbridge>=0.13.0 && \
NVCC_APPEND_FLAGS="--threads 4" APEX_PARALLEL_BUILD=8 \
uv pip install -v --no-build-isolation \
--config-settings="--build-option=--cpp_ext" \
Expand Down
7 changes: 3 additions & 4 deletions tests/utils/swanlab_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
class TestSwanlabMonitor(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls._original_env = {
"SWANLAB_API_KEY": os.environ.get("SWANLAB_API_KEY"),
}
os.environ["SWANLAB_API_KEY"] = "xxxxxxxxxxxxxxxxxxxxx"

@classmethod
Expand All @@ -31,7 +34,3 @@ def test_swanlab_monitor_smoke(self):
# Log a minimal metric to verify basic flow
mon.log({"smoke/metric": 1.0}, step=1)
mon.close()


if __name__ == "__main__":
unittest.main()
2 changes: 1 addition & 1 deletion trinity/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# -*- coding: utf-8 -*-
"""Trinity-RFT (Reinforcement Fine-Tuning)"""

__version__ = "0.3.3"
__version__ = "0.4.0"
2 changes: 1 addition & 1 deletion trinity/common/models/vllm_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ async def convert_messages_to_experience(
if len(token_ids) > self.config.max_model_len - 1:
truncate_status = "response_truncated"
self.logger.warning(
f"Warning: {len(token_ids) = } exceeds the length limit {self.config.max_model_len-1 = }"
f"Warning: {len(token_ids)=} exceeds the length limit {(self.config.max_model_len - 1)=}"
)
token_ids = token_ids[: self.config.max_model_len - 1]
action_mask = action_mask[: self.config.max_model_len - 1]
Expand Down
Loading