Skip to content

Commit 7550664

Browse files
Merge branch 'meta-pytorch:main' into main
2 parents baeb35b + 399b20d commit 7550664

File tree

105 files changed

+3324
-2824
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

105 files changed

+3324
-2824
lines changed

.github/packaging/pre_build_cpu.sh

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,13 @@ set -euxo pipefail
44
# Builds vLLM
55
# This script builds vLLM and places its wheel into dist/.
66

7-
VLLM_BRANCH="v0.10.0"
7+
# Script runs relative to forge root
8+
CURRENT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
9+
echo "current dir is $CURRENT_DIR"
10+
VERSIONS_FILE="$CURRENT_DIR/../../assets/versions.sh"
11+
echo "versions file is $VERSIONS_FILE"
12+
source "$VERSIONS_FILE"
13+
814
BUILD_DIR="$HOME/forge-build"
915

1016
# Push other files to the dist folder

.github/packaging/pre_build_gpu.sh

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,13 @@ set -euxo pipefail
44
# Builds Monarch
55
# This script builds Monarch and places its wheel into dist/.
66

7-
MONARCH_COMMIT="265034a29ec3fb35919f4a9c23c65f2f4237190d"
7+
# Script runs relative to forge root
8+
CURRENT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
9+
echo "current dir is $CURRENT_DIR"
10+
VERSIONS_FILE="$CURRENT_DIR/../../assets/versions.sh"
11+
echo "versions file is $VERSIONS_FILE"
12+
source "$VERSIONS_FILE"
13+
814
BUILD_DIR="$HOME/forge-build"
915

1016
# Push other files to the dist folder
@@ -16,6 +22,7 @@ echo "build dir is $BUILD_DIR"
1622
echo "wheel dir is $WHL_DIR"
1723

1824
build_monarch() {
25+
export MONARCH_PACKAGE_NAME="torchmonarch"
1926
# Get Rust build related pieces
2027
if ! command -v rustup &> /dev/null; then
2128
echo "getting rustup"

.github/packaging/vllm_reqs.txt

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
# These requirements were generated by running steps 1-3 of scripts/build_wheels.sh
2+
# then running pip freeze and manually removing the vllm dependency.
3+
# The intention of this file is to use these known requirements for a fixed
4+
# vLLM build to supplement a vLLM install from download.pytorch.org without
5+
# resorting to --extra-index-url https://download.pytorch.org/whl/nightly to find
6+
# vLLM dependencies (as this results in a ResolutionTooDeep error from pip).
7+
# See the file .github/workflows/gpu_test.yaml for an E2E forge installation using this approach.
8+
# TODO: this should be done way less hackily
9+
aiohappyeyeballs==2.6.1
10+
aiohttp==3.13.0
11+
aiosignal==1.4.0
12+
annotated-types==0.7.0
13+
anyio==4.11.0
14+
astor==0.8.1
15+
async-timeout==5.0.1
16+
attrs==25.4.0
17+
blake3==1.0.7
18+
cachetools==6.2.0
19+
cbor2==5.7.0
20+
certifi==2025.10.5
21+
cffi==2.0.0
22+
charset-normalizer==3.4.3
23+
click==8.3.0
24+
cloudpickle==3.1.1
25+
cmake==4.1.0
26+
compressed-tensors==0.10.2
27+
cupy-cuda12x==13.6.0
28+
depyf==0.19.0
29+
dill==0.4.0
30+
diskcache==5.6.3
31+
distro==1.9.0
32+
dnspython==2.8.0
33+
einops==0.8.1
34+
email-validator==2.3.0
35+
exceptiongroup==1.3.0
36+
fastapi==0.118.3
37+
fastapi-cli==0.0.13
38+
fastapi-cloud-cli==0.3.1
39+
fastrlock==0.8.3
40+
filelock==3.19.1
41+
frozenlist==1.8.0
42+
fsspec==2025.9.0
43+
gguf==0.17.1
44+
h11==0.16.0
45+
hf-xet==1.1.10
46+
httpcore==1.0.9
47+
httptools==0.7.1
48+
httpx==0.28.1
49+
huggingface-hub==0.35.3
50+
idna==3.10
51+
interegular==0.3.3
52+
Jinja2==3.1.6
53+
jiter==0.11.0
54+
jsonschema==4.25.1
55+
jsonschema-specifications==2025.9.1
56+
lark==1.2.2
57+
llguidance==0.7.30
58+
llvmlite==0.44.0
59+
lm-format-enforcer==0.10.12
60+
markdown-it-py==4.0.0
61+
MarkupSafe==3.0.2
62+
mdurl==0.1.2
63+
mistral_common==1.8.5
64+
mpmath==1.3.0
65+
msgpack==1.1.2
66+
msgspec==0.19.0
67+
multidict==6.7.0
68+
networkx==3.4.2
69+
ninja==1.13.0
70+
numba==0.61.2
71+
numpy==2.2.6
72+
nvidia-cublas-cu12==12.9.1.4
73+
nvidia-cuda-cupti-cu12==12.9.79
74+
nvidia-cuda-nvrtc-cu12==12.9.86
75+
nvidia-cuda-runtime-cu12==12.9.79
76+
nvidia-cudnn-cu12==9.10.2.21
77+
nvidia-cufft-cu12==11.4.1.4
78+
nvidia-cufile-cu12==1.14.1.1
79+
nvidia-curand-cu12==10.3.10.19
80+
nvidia-cusolver-cu12==11.7.5.82
81+
nvidia-cusparse-cu12==12.5.10.65
82+
nvidia-cusparselt-cu12==0.7.1
83+
nvidia-nccl-cu12==2.27.5
84+
nvidia-nvjitlink-cu12==12.9.86
85+
nvidia-nvshmem-cu12==3.3.20
86+
nvidia-nvtx-cu12==12.9.79
87+
openai==1.90.0
88+
opencv-python-headless==4.12.0.88
89+
outlines_core==0.2.10
90+
packaging==25.0
91+
partial-json-parser==0.2.1.1.post6
92+
pillow==11.3.0
93+
prometheus-fastapi-instrumentator==7.1.0
94+
prometheus_client==0.23.1
95+
propcache==0.4.1
96+
protobuf==6.32.1
97+
psutil==7.1.0
98+
py-cpuinfo==9.0.0
99+
pybase64==1.4.2
100+
pycountry==24.6.1
101+
pycparser==2.23
102+
pydantic==2.12.0
103+
pydantic-extra-types==2.10.6
104+
pydantic_core==2.41.1
105+
Pygments==2.19.2
106+
python-dotenv==1.1.1
107+
python-json-logger==4.0.0
108+
python-multipart==0.0.20
109+
pytorch-triton==3.4.0+gitf7888497
110+
PyYAML==6.0.3
111+
pyzmq==27.1.0
112+
ray==2.49.2
113+
referencing==0.36.2
114+
regex==2025.9.18
115+
requests==2.32.5
116+
rich==14.2.0
117+
rich-toolkit==0.15.1
118+
rignore==0.7.0
119+
rpds-py==0.27.1
120+
safetensors==0.6.2
121+
scipy==1.15.3
122+
sentencepiece==0.2.1
123+
sentry-sdk==2.41.0
124+
setuptools-scm==9.2.0
125+
shellingham==1.5.4
126+
sniffio==1.3.1
127+
soundfile==0.13.1
128+
soxr==1.0.0
129+
starlette==0.48.0
130+
sympy==1.14.0
131+
tiktoken==0.12.0
132+
tokenizers==0.22.1
133+
tomli==2.3.0
134+
torch==2.9.0.dev20250905+cu129
135+
tqdm==4.67.1
136+
transformers==4.57.0
137+
triton==3.4.0
138+
typer==0.19.2
139+
typing-inspection==0.4.2
140+
typing_extensions==4.15.0
141+
urllib3==2.5.0
142+
uvicorn==0.37.0
143+
uvloop==0.21.0
144+
watchfiles==1.1.0
145+
websockets==15.0.1
146+
xgrammar==0.1.21
147+
yarl==1.22.0

.github/workflows/build_vllm.yaml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ permissions:
1212

1313
jobs:
1414
build:
15-
name: forge-cu128-nightly
15+
name: forge-cu129-nightly
1616
uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@main
1717
strategy:
1818
fail-fast: false
@@ -31,13 +31,13 @@ jobs:
3131
{
3232
"python_version": "3.10",
3333
"gpu_arch_type": "cpu",
34-
"gpu_arch_version": "12.8",
35-
"desired_cuda": "cu128",
36-
"container_image": "pytorch/manylinux2_28-builder:cuda12.8",
34+
"gpu_arch_version": "12.9",
35+
"desired_cuda": "cu129",
36+
"container_image": "pytorch/manylinux2_28-builder:cuda12.9",
3737
"package_type": "manywheel",
38-
"build_name": "manywheel-py3_10-cuda12_8",
38+
"build_name": "manywheel-py3_10-cuda12_9",
3939
"validation_runner": "linux.12xlarge.memory",
40-
"installation": "pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128",
40+
"installation": "pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu129",
4141
"channel": "nightly",
4242
"upload_to_base_bucket": "no",
4343
"stable_version": "2.8.0",

.github/workflows/build_wheels.yaml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ permissions:
1212

1313
jobs:
1414
build:
15-
name: forge-cu128-nightly
15+
name: forge-cu129-nightly
1616
uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@main
1717
strategy:
1818
fail-fast: false
@@ -31,13 +31,13 @@ jobs:
3131
{
3232
"python_version": "3.10",
3333
"gpu_arch_type": "cuda",
34-
"gpu_arch_version": "12.8",
35-
"desired_cuda": "cu128",
36-
"container_image": "pytorch/manylinux2_28-builder:cuda12.8",
34+
"gpu_arch_version": "12.9",
35+
"desired_cuda": "cu129",
36+
"container_image": "pytorch/manylinux2_28-builder:cuda12.9",
3737
"package_type": "manywheel",
38-
"build_name": "manywheel-py3_10-cuda12_8",
38+
"build_name": "manywheel-py3_10-cuda12_9",
3939
"validation_runner": "linux.4xlarge.nvidia.gpu",
40-
"installation": "pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128",
40+
"installation": "pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu129",
4141
"channel": "nightly",
4242
"upload_to_base_bucket": "no",
4343
"stable_version": "2.8.0",

.github/workflows/docs.yml

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ on:
99

1010
jobs:
1111
build-docs:
12+
if: github.repository_owner == 'meta-pytorch'
1213
name: Build Documentation
1314
runs-on: linux.g5.4xlarge.nvidia.gpu
1415
timeout-minutes: 30
@@ -36,10 +37,10 @@ jobs:
3637
run: python -m pip install --upgrade pip
3738
- name: Install pytorch
3839
shell: bash -l {0}
39-
run: python -m pip install torch==2.9.0 --index-url https://download.pytorch.org/whl/test/cu130
40+
run: pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu130 --force-reinstall
4041
- name: Install monarch
4142
shell: bash -l {0}
42-
run: python -m pip install monarch-no-torch==0.1.0.dev20250826 --find-links assets/ci
43+
run: pip install assets/ci/monarch_no_torch-0.1.0.dev20251010-py3-none-any.whl
4344
- name: Install torchforge
4445
shell: bash -l {0}
4546
env:
@@ -52,9 +53,35 @@ jobs:
5253
shell: bash -l {0}
5354
working-directory: docs
5455
run: |
55-
set +e # Don't exit on error
56-
make html SPHINXOPTS="-WT --keep-going" || echo "Build completed with warnings/errors"
57-
set -e # Re-enable exit on error for subsequent commands
56+
# Set up library paths to ensure all dependencies are available
57+
# This is critical for monarch and other native dependencies that need libpython3.10.so.1.0
58+
export LD_LIBRARY_PATH="${CONDA_PREFIX}/lib:${LD_LIBRARY_PATH:-}"
59+
60+
# Also set CUDA paths if needed
61+
if [ -d "/usr/local/cuda-12.9" ]; then
62+
export LD_LIBRARY_PATH="/usr/local/cuda-12.9/compat:${LD_LIBRARY_PATH}"
63+
export CUDA_HOME=/usr/local/cuda-12.9
64+
fi
65+
66+
# Verify dependencies can be imported before building docs
67+
echo "Verifying dependencies..."
68+
python -c "import forge; print('✓ forge imported successfully')"
69+
python -c "import monarch; print('✓ monarch imported successfully')"
70+
71+
# Build docs with -W (warnings as errors) and --keep-going to see all issues
72+
# Capture exit code but continue to see all errors
73+
set +e
74+
make html SPHINXOPTS="-W --keep-going"
75+
BUILD_EXIT_CODE=$?
76+
set -e
77+
78+
# Report results
79+
if [ $BUILD_EXIT_CODE -ne 0 ]; then
80+
echo "❌ Documentation build failed with warnings or errors (exit code: $BUILD_EXIT_CODE)"
81+
exit $BUILD_EXIT_CODE
82+
else
83+
echo "✅ Documentation build completed successfully with no warnings or errors"
84+
fi
5885
- name: Upload docs artifact
5986
uses: actions/upload-artifact@v4
6087
with:

.github/workflows/gpu_test.yaml

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
name: GPU tests
2+
3+
on:
4+
schedule:
5+
# Runs at midnight every day
6+
- cron: '0 0 * * *'
7+
push:
8+
branches: [ main ]
9+
pull_request:
10+
workflow_dispatch:
11+
12+
concurrency:
13+
group: gpu-test-${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }}
14+
cancel-in-progress: true
15+
16+
permissions:
17+
id-token: write
18+
contents: read
19+
20+
defaults:
21+
run:
22+
shell: bash -l -eo pipefail {0}
23+
24+
jobs:
25+
gpu_test:
26+
if: github.repository_owner == 'meta-pytorch'
27+
runs-on: linux.g5.12xlarge.nvidia.gpu
28+
strategy:
29+
matrix:
30+
python-version: ['3.10']
31+
steps:
32+
- name: Check out repo
33+
uses: actions/checkout@v4
34+
- name: Setup conda env
35+
uses: conda-incubator/setup-miniconda@v2
36+
with:
37+
auto-update-conda: true
38+
miniconda-version: "latest"
39+
activate-environment: test
40+
python-version: ${{ matrix.python-version }}
41+
- name: Update pip
42+
run: python -m pip install --upgrade pip
43+
- name: Install pinned torch nightly
44+
run: python -m pip install --pre torch==2.9.0.dev20250905 --no-cache-dir --index-url https://download.pytorch.org/whl/nightly/cu129
45+
- name: Download and install vLLM and its dependencies
46+
# TODO: this honestly could not be hackier if I tried
47+
run: |
48+
python -m pip install -r .github/packaging/vllm_reqs.txt
49+
python -m pip install vllm==0.10.1.dev0+g6d8d0a24c.d20251009.cu129 --no-cache-dir --index-url https://download.pytorch.org/whl/preview/forge
50+
- name: Install Monarch
51+
run: pip install torchmonarch==0.1.0rc1
52+
- name: Install torchtitan and torchstore
53+
run: |
54+
python -m pip install git+https://github.com/pytorch/torchtitan.git
55+
python -m pip install git+https://github.com/meta-pytorch/torchstore.git
56+
- name: Install dependencies
57+
run: python -m pip install --no-build-isolation -e ".[dev]"
58+
- name: Run unit tests with coverage
59+
# TODO add all tests
60+
run: |
61+
export LD_PRELOAD=$CONDA/envs/test/lib/libpython3.10.so.1.0
62+
export LD_LIBRARY_PATH=$CONDA/envs/test/lib/libpython3.10.so.1.0
63+
pytest tests/unit_tests --cov=. --cov-report=xml --durations=20 -vv
64+
- name: Upload Coverage to Codecov
65+
uses: codecov/codecov-action@v3

.github/workflows/unit_test.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ jobs:
2626
- name: Install pytorch
2727
run: python -m pip install torch==2.9.0.dev20250826 --extra-index-url https://download.pytorch.org/whl/nightly/cpu
2828
- name: Install monarch
29-
run: python -m pip install monarch-no-torch==0.1.0.dev20250826 --find-links assets/ci
29+
run: pip install assets/ci/monarch_no_torch-0.1.0.dev20251010-py3-none-any.whl
3030
- name: Install torchstore
3131
run: pip install assets/wheels/torchstore-0.1.0-py3-none-any.whl
3232
- name: Install torchtitan

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ docs/source/generated_examples/
153153
docs/source/gen_modules/
154154
docs/source/generated/
155155
docs/source/sg_execution_times.rst
156-
docs/source/tutorials
156+
docs/source/tutorials/*
157157
# pytorch-sphinx-theme gets installed here
158158
docs/src
159159

0 commit comments

Comments
 (0)