Skip to content

Commit b922ec7

Browse files
Consolidate on install.sh, move to stable packages (#474)
Co-authored-by: joecummings <[email protected]>
1 parent 0c67e40 commit b922ec7

File tree

15 files changed

+232
-652
lines changed

15 files changed

+232
-652
lines changed

.github/packaging/pre_build_cpu.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ echo "wheel dir is $WHL_DIR"
2424
build_vllm() {
2525
cd "$BUILD_DIR"
2626

27-
git clone https://github.com/vllm-project/vllm.git --branch $VLLM_BRANCH
27+
git clone https://github.com/vllm-project/vllm.git --branch $VLLM_VERSION
2828
cd "$BUILD_DIR/vllm"
2929

3030
python use_existing_torch.py

.github/packaging/vllm_reqs_12_8.txt

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,4 @@
1-
# These requirements were generated by running steps 1-3 of scripts/build_wheels.sh
2-
# then running pip freeze and manually removing the vllm dependency.
3-
# The intention of this file is to use these known requirements for a fixed
4-
# vLLM build to supplement a vLLM install from download.pytorch.org without
5-
# resorting to --extra-index-url https://download.pytorch.org/whl/nightly to find
6-
# vLLM dependencies (as this results in a ResolutionTooDeep error from pip).
7-
# See the file .github/workflows/gpu_test.yaml for an E2E forge installation using this approach.
8-
# TODO: this should be done way less hackily
1+
# This file was generated by running ./scripts/generate_vllm_reqs.sh
92
aiohappyeyeballs==2.6.1
103
aiohttp==3.13.1
114
aiosignal==1.4.0
@@ -33,8 +26,8 @@ dnspython==2.8.0
3326
einops==0.8.1
3427
email-validator==2.3.0
3528
exceptiongroup==1.3.0
36-
fastapi==0.119.0
37-
fastapi-cli==0.0.13
29+
fastapi==0.119.1
30+
fastapi-cli==0.0.14
3831
fastapi-cloud-cli==0.3.1
3932
fastrlock==0.8.3
4033
filelock==3.19.1
@@ -94,7 +87,7 @@ prometheus-fastapi-instrumentator==7.1.0
9487
prometheus_client==0.23.1
9588
propcache==0.4.1
9689
protobuf==6.33.0
97-
psutil==7.1.0
90+
psutil==7.1.1
9891
py-cpuinfo==9.0.0
9992
pybase64==1.4.2
10093
pycountry==24.6.1
@@ -108,9 +101,9 @@ python-json-logger==4.0.0
108101
python-multipart==0.0.20
109102
PyYAML==6.0.3
110103
pyzmq==27.1.0
111-
ray==2.50.0
104+
ray==2.50.1
112105
referencing==0.37.0
113-
regex==2025.9.18
106+
regex==2025.10.23
114107
requests==2.32.5
115108
rich==14.2.0
116109
rich-toolkit==0.15.1
@@ -119,8 +112,8 @@ rpds-py==0.27.1
119112
safetensors==0.6.2
120113
scipy==1.15.3
121114
sentencepiece==0.2.1
122-
sentry-sdk==2.42.0
123-
setuptools-scm==9.2.1
115+
sentry-sdk==2.42.1
116+
setuptools-scm==9.2.2
124117
shellingham==1.5.4
125118
sniffio==1.3.1
126119
soundfile==0.13.1
@@ -134,11 +127,11 @@ torch==2.9.0+cu128
134127
tqdm==4.67.1
135128
transformers==4.57.1
136129
triton==3.5.0
137-
typer==0.19.2
130+
typer==0.20.0
138131
typing-inspection==0.4.2
139132
typing_extensions==4.15.0
140133
urllib3==2.5.0
141-
uvicorn==0.37.0
134+
uvicorn==0.38.0
142135
uvloop==0.22.1
143136
watchfiles==1.1.1
144137
websockets==15.0.1

.github/workflows/docs.yml

Lines changed: 1 addition & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -26,62 +26,19 @@ jobs:
2626
activate-environment: test
2727
python-version: '3.10'
2828
auto-activate: false
29-
- name: Verify conda environment
30-
shell: bash -l {0}
31-
run: |
32-
conda info
33-
which python
34-
which conda
3529
- name: Update pip
3630
shell: bash -l {0}
3731
run: python -m pip install --upgrade pip
38-
- name: Install pytorch
39-
shell: bash -l {0}
40-
run: pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu130 --force-reinstall
41-
- name: Install monarch
42-
shell: bash -l {0}
43-
run: pip install assets/ci/monarch_no_torch-0.1.0.dev20251010-py3-none-any.whl
4432
- name: Install torchforge
4533
shell: bash -l {0}
46-
env:
47-
GH_TOKEN: ${{ github.token }}
4834
run: ./scripts/install.sh
4935
- name: Install docs dependencies
5036
shell: bash -l {0}
5137
run: python -m pip install -r docs/requirements.txt
5238
- name: Build docs
5339
shell: bash -l {0}
5440
working-directory: docs
55-
run: |
56-
# Set up library paths to ensure all dependencies are available
57-
# This is critical for monarch and other native dependencies that need libpython3.10.so.1.0
58-
export LD_LIBRARY_PATH="${CONDA_PREFIX}/lib:${LD_LIBRARY_PATH:-}"
59-
60-
# Also set CUDA paths if needed
61-
if [ -d "/usr/local/cuda-12.9" ]; then
62-
export LD_LIBRARY_PATH="/usr/local/cuda-12.9/compat:${LD_LIBRARY_PATH}"
63-
export CUDA_HOME=/usr/local/cuda-12.9
64-
fi
65-
66-
# Verify dependencies can be imported before building docs
67-
echo "Verifying dependencies..."
68-
python -c "import forge; print('✓ torchforge imported successfully')"
69-
python -c "import monarch; print('✓ monarch imported successfully')"
70-
71-
# Build docs with -W (warnings as errors) and --keep-going to see all issues
72-
# Capture exit code but continue to see all errors
73-
set +e
74-
make html SPHINXOPTS="-W --keep-going"
75-
BUILD_EXIT_CODE=$?
76-
set -e
77-
78-
# Report results
79-
if [ $BUILD_EXIT_CODE -ne 0 ]; then
80-
echo "❌ Documentation build failed with warnings or errors (exit code: $BUILD_EXIT_CODE)"
81-
exit $BUILD_EXIT_CODE
82-
else
83-
echo "✅ Documentation build completed successfully with no warnings or errors"
84-
fi
41+
run: make html
8542
- name: Upload docs artifact
8643
uses: actions/upload-artifact@v4
8744
with:

.github/workflows/gpu_test.yaml

Lines changed: 5 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
1-
name: GPU Tests
1+
name: Unit Tests (GPU)
22

33
on:
4-
schedule:
5-
# Runs at midnight every day
6-
- cron: '0 0 * * *'
74
push:
85
branches: [ main ]
96
pull_request:
@@ -27,7 +24,7 @@ jobs:
2724
runs-on: linux.g5.12xlarge.nvidia.gpu
2825
strategy:
2926
matrix:
30-
python-version: ['3.10']
27+
python-version: ['3.10', '3.11', '3.12']
3128
steps:
3229
- name: Check out repo
3330
uses: actions/checkout@v4
@@ -40,26 +37,10 @@ jobs:
4037
python-version: ${{ matrix.python-version }}
4138
- name: Update pip
4239
run: python -m pip install --upgrade pip
43-
- name: Install pinned torch nightly
44-
run: python -m pip install --pre torch==2.9.0.dev20250905 --no-cache-dir --index-url https://download.pytorch.org/whl/nightly/cu129
45-
- name: Download and install vLLM and its dependencies
46-
# TODO: this honestly could not be hackier if I tried
47-
run: |
48-
python -m pip install -r .github/packaging/vllm_reqs_12_9.txt
49-
python -m pip install vllm==0.10.1.dev0+g6d8d0a24c.d20251009.cu129 --no-cache-dir --index-url https://download.pytorch.org/whl/preview/forge
50-
- name: Install Monarch
51-
run: pip install torchmonarch==0.1.0rc1
52-
- name: Install torchtitan and torchstore
53-
run: |
54-
python -m pip install git+https://github.com/pytorch/torchtitan.git
55-
python -m pip install git+https://github.com/meta-pytorch/torchstore.git
56-
- name: Install dependencies
57-
run: python -m pip install --no-build-isolation -e ".[dev]"
40+
- name: Install torchforge
41+
run: ./scripts/install.sh
5842
- name: Run unit tests with coverage
5943
# TODO add all tests
60-
run: |
61-
export LD_PRELOAD=$CONDA/envs/test/lib/libpython3.10.so.1.0
62-
export LD_LIBRARY_PATH=$CONDA/envs/test/lib/libpython3.10.so.1.0
63-
pytest tests/unit_tests --cov=. --cov-report=xml --durations=20 -vv
44+
run: pytest tests/unit_tests --cov=. --cov-report=xml --durations=20 -vv
6445
- name: Upload Coverage to Codecov
6546
uses: codecov/codecov-action@v3

.github/workflows/unit_test.yaml

Lines changed: 0 additions & 44 deletions
This file was deleted.

.meta/mast/env_setup.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -150,10 +150,10 @@ if [ -f "$VERSIONS_FILE" ]; then
150150
log_info "Sourcing version information from: $VERSIONS_FILE"
151151
source "$VERSIONS_FILE"
152152

153-
if [ -n "$TORCHTITAN_COMMIT" ]; then
154-
log_info "Installing torchtitan from commit: $TORCHTITAN_COMMIT"
153+
if [ -n "$TORCHTITAN_COMMIT_MAST" ]; then
154+
log_info "Installing torchtitan from commit: $TORCHTITAN_COMMIT_MAST"
155155
pip uninstall -y torchtitan
156-
pip install "git+https://github.com/pytorch/torchtitan.git@$TORCHTITAN_COMMIT"
156+
pip install "git+https://github.com/pytorch/torchtitan.git@$TORCHTITAN_COMMIT_MAST"
157157

158158
if [ $? -eq 0 ]; then
159159
log_info "Torchtitan installed successfully"
@@ -162,7 +162,7 @@ if [ -f "$VERSIONS_FILE" ]; then
162162
exit 1
163163
fi
164164
else
165-
log_error "TORCHTITAN_COMMIT not found in versions.sh"
165+
log_error "TORCHTITAN_COMMIT_MAST not found in versions.sh"
166166
exit 1
167167
fi
168168
else

README.md

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
# <img width="35" height="35" alt="image" src="https://github.com/user-attachments/assets/2700a971-e5d6-4036-b03f-2f89c9791609" /> torchforge
22

33
#### A PyTorch-native agentic RL library that lets you focus on algorithms—not infra.
4-
[![Unit Tests](https://github.com/meta-pytorch/forge/actions/workflows/unit_test.yaml/badge.svg?branch=main)](https://github.com/meta-pytorch/forge/actions/workflows/unit_test.yaml?query=branch%3Amain)
54
[![GPU Tests](https://github.com/meta-pytorch/forge/actions/workflows/gpu_test.yaml/badge.svg?branch=main)](https://github.com/meta-pytorch/forge/actions/workflows/gpu_test.yaml?query=branch%3Amain)
65
[![Documentation](https://img.shields.io/badge/Docs-meta--pytorch.org-blue?style=flat&logo=readthedocs&logoColor=white)](https://meta-pytorch.org/torchforge/)
76
[![Discord](https://img.shields.io/badge/Discord-OpenEnv-7289da?style=flat&logo=discord&logoColor=white)](https://discord.gg/YsTYBh6PD9)
@@ -33,14 +32,11 @@ You can also find our notebook tutorials (coming soon)
3332

3433
### Basic
3534

36-
torchforge requires the latest PyTorch nightly with [Monarch](https://github.com/meta-pytorch/monarch), [vLLM](https://github.com/vllm-project/vllm), and [torchtitan](https://github.com/pytorch/torchtitan). For convenience,
37-
we have pre-packaged these dependencies as wheels in assets/wheels. (Note that the basic install script
35+
torchforge requires PyTorch 2.9.0 with [Monarch](https://github.com/meta-pytorch/monarch), [vLLM](https://github.com/vllm-project/vllm), and [torchtitan](https://github.com/pytorch/torchtitan). (Note that the basic install script
3836
uses [DNF](https://docs.fedoraproject.org/en-US/quick-docs/dnf/), but could be easily extended to other Linux OS.)
3937

40-
torchforge requires the Github CLI (gh) to download a compatible vLLM package. See [here](https://github.com/cli/cli#installation) for gh install instructions before continuting. Please login to gh with your Github account before continuing with `gh auth login`. You may use either https or ssh as the protocol for authentication.
41-
4238
```bash
43-
conda create -n forge python=3.10
39+
conda create -n forge python=3.12
4440
conda activate forge
4541
./scripts/install.sh
4642
```
@@ -53,11 +49,6 @@ After install, you can run the following command and should see output confirmin
5349
python -m apps.grpo.main --config apps/grpo/qwen3_1_7b.yaml
5450
```
5551

56-
If you need to re-build the wheels for whatever reason, you can do so with:
57-
```bash
58-
./scripts/build_wheels.sh
59-
```
60-
6152
## Quick Start
6253

6354
To run SFT on a Llama3 8B model, run

apps/grpo/main.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,17 +59,15 @@ def policy_version(self) -> int | None:
5959

6060
@property
6161
def request_tensor(self) -> torch.Tensor:
62-
request_tokens: torch.Tensor = self.completion.prompt_ids
63-
tensor = torch.tensor(request_tokens, dtype=torch.long)
62+
tensor: torch.Tensor = self.completion.prompt_ids.to(torch.long)
6463
if tensor.shape[0] < self.request_len: # left pad
6564
diff = self.request_len - tensor.shape[0]
6665
tensor = F.pad(tensor, (diff, 0), value=self.pad_id)
6766
return tensor
6867

6968
@property
7069
def response_tensor(self) -> torch.Tensor:
71-
response_tokens: torch.Tensor = self.completion.token_ids
72-
tensor = torch.tensor(response_tokens, dtype=torch.long)
70+
tensor: torch.Tensor = self.completion.token_ids.to(torch.long)
7371
if tensor.shape[0] < self.response_len: # right pad
7472
diff = self.response_len - tensor.shape[0]
7573
tensor = F.pad(tensor, (0, diff), value=self.pad_id)

assets/versions.sh

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,12 @@
77
# Version Configuration for Forge Wheel Building
88
# This file contains all pinned versions and commits for dependencies
99

10-
# PyTorch version
11-
PYTORCH_VERSION="2.9.0.dev20250905"
10+
# Stable versions of upstream libraries for OSS repo
11+
PYTORCH_VERSION="2.9.0"
12+
VLLM_VERSION="v0.10.0"
13+
MONARCH_VERSION="0.1.0rc8"
14+
TORCHTITAN_VERSION="0.2.0"
15+
TORCHSTORE_VERSION="0.0.1.rc3"
1216

13-
# vLLM branch
14-
VLLM_BRANCH="v0.10.0"
15-
16-
# Commit hashes
17-
MONARCH_COMMIT="195503223b5c2896846171f60ac99dc6868f8f2c"
18-
TORCHTITAN_COMMIT="d0e25450bcac2332359b13fbda430dc701f073d4"
19-
TORCHSTORE_COMMIT="662299faf4fd50ee30bd9aa3f4ce8c0e2db1d310"
17+
# Torchtitan commit hash for launching on MAST
18+
TORCHTITAN_COMMIT_MAST="d0e25450bcac2332359b13fbda430dc701f073d4"
-44.6 MB
Binary file not shown.

0 commit comments

Comments
 (0)