From 9d100819e6752d711033a0ce4fa0bf011d5234d5 Mon Sep 17 00:00:00 2001 From: Evan Smothers Date: Thu, 2 Oct 2025 11:36:24 -0700 Subject: [PATCH 01/17] recreate wheel build PR --- .github/packaging/post_build_script.sh | 12 +++ .github/packaging/pre_build_cpu.sh | 34 +++++++ .github/packaging/pre_build_gpu.sh | 117 +++++++++++++++++++++++++ .github/workflows/build_vllm.yaml | 50 +++++++++++ .github/workflows/build_wheels.yaml | 50 +++++++++++ src/forge/__init__.py | 2 + version.txt | 1 + 7 files changed, 266 insertions(+) create mode 100644 .github/packaging/post_build_script.sh create mode 100644 .github/packaging/pre_build_cpu.sh create mode 100644 .github/packaging/pre_build_gpu.sh create mode 100644 .github/workflows/build_vllm.yaml create mode 100644 .github/workflows/build_wheels.yaml create mode 100644 version.txt diff --git a/.github/packaging/post_build_script.sh b/.github/packaging/post_build_script.sh new file mode 100644 index 000000000..e1845ac93 --- /dev/null +++ b/.github/packaging/post_build_script.sh @@ -0,0 +1,12 @@ +#!/bin/bash +set -euxo pipefail + +FORGE_WHEEL=${GITHUB_WORKSPACE}/${REPOSITORY}/dist/*.whl +WHL_DIR="${GITHUB_WORKSPACE}/wheels/" +DIST=dist/ + +echo "Uploading wheels to S3" +ls -l "${WHL_DIR}" +ls ${FORGE_WHEEL} +echo "Copying files from $WHL_DIR to $DIST" +mkdir -p $DIST && cp $WHL_DIR/* $DIST diff --git a/.github/packaging/pre_build_cpu.sh b/.github/packaging/pre_build_cpu.sh new file mode 100644 index 000000000..4bc843701 --- /dev/null +++ b/.github/packaging/pre_build_cpu.sh @@ -0,0 +1,34 @@ +#!/bin/bash +set -euxo pipefail + +# Builds vLLM, Monarch and torchstore +# This script builds vLLM, Monarch and torchstore and places +# their wheels into dist/. + +VLLM_BRANCH="v0.10.0" +BUILD_DIR="$HOME/forge-build" + +# Push other files to the dist folder +WHL_DIR="${GITHUB_WORKSPACE}/wheels/dist" + +mkdir -p $BUILD_DIR +mkdir -p $WHL_DIR +echo "build dir is $BUILD_DIR" +echo "wheel dir is $WHL_DIR" + +build_vllm() { + cd "$BUILD_DIR" + + git clone https://github.com/vllm-project/vllm.git --branch $VLLM_BRANCH + cd "$BUILD_DIR/vllm" + + python use_existing_torch.py + pip install -r requirements/build.txt + export VERBOSE=1 + export CMAKE_VERBOSE_MAKEFILE=1 + export FORCE_CMAKE=1 + pip wheel -v --no-build-isolation --no-deps . -w "$WHL_DIR" +} + + +build_vllm \ No newline at end of file diff --git a/.github/packaging/pre_build_gpu.sh b/.github/packaging/pre_build_gpu.sh new file mode 100644 index 000000000..9ae2b8ff9 --- /dev/null +++ b/.github/packaging/pre_build_gpu.sh @@ -0,0 +1,117 @@ +#!/bin/bash +set -euxo pipefail + +# Builds vLLM, Monarch and torchstore +# This script builds vLLM, Monarch and torchstore and places +# their wheels into dist/. + +MONARCH_COMMIT="265034a29ec3fb35919f4a9c23c65f2f4237190d" +TORCHTITAN_COMMIT="82f0287b966f1735819a377a9a09e7a303c55faa" +TORCHSTORE_COMMIT="main" +BUILD_DIR="$HOME/forge-build" + +# Push other files to the dist folder +WHL_DIR="${GITHUB_WORKSPACE}/wheels/dist" + +mkdir -p $BUILD_DIR +mkdir -p $WHL_DIR +echo "build dir is $BUILD_DIR" +echo "wheel dir is $WHL_DIR" + +build_vllm() { + cd "$BUILD_DIR" + + git clone https://github.com/vllm-project/vllm.git --branch $VLLM_BRANCH + cd "$BUILD_DIR/vllm" + + python use_existing_torch.py + pip install -r requirements/build.txt + export VERBOSE=1 + export CMAKE_VERBOSE_MAKEFILE=1 + export FORCE_CMAKE=1 + # export MAX_JOBS=2 # don't resource starve the host + # export CMAKE_BUILD_PARALLEL_LEVEL=2 + # export MAKEFLAGS=-j2 + pip wheel -v --no-build-isolation --no-deps . -w "$WHL_DIR" +} + +build_monarch() { + # Get Rust build related pieces + if ! command -v rustup &> /dev/null; then + echo "getting rustup" + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + export PATH="$HOME/.cargo/bin:$PATH" + echo "$HOME/.cargo/bin" >> $GITHUB_PATH + fi + + rustup toolchain install nightly + rustup default nightly + + if command -v dnf &>/dev/null; then + dnf install -y clang-devel \ + libibverbs rdma-core libmlx5 libibverbs-devel rdma-core-devel fmt-devel \ + libunwind-devel + elif command -v apt-get &>/dev/null; then + apt-get update + apt-get install -y clang libunwind-dev \ + libibverbs-dev librdmacm-dev libfmt-dev + fi + + cd "$BUILD_DIR" + git clone https://github.com/meta-pytorch/monarch.git + cd "$BUILD_DIR/monarch" + git checkout $MONARCH_COMMIT + + pip install -r build-requirements.txt + export USE_TENSOR_ENGINE=1 + export RUST_BACKTRACE=1 + export CARGO_TERM_VERBOSE=true + export CARGO_TERM_COLOR=always + pip wheel --no-build-isolation --no-deps . -w "$WHL_DIR" +} + +build_torchtitan() { + cd "$BUILD_DIR" + git clone https://github.com/pytorch/torchtitan.git + cd "$BUILD_DIR/torchtitan" + git checkout $TORCHTITAN_COMMIT + + pip wheel --no-deps . -w "$WHL_DIR" +} + +build_torchstore() { + cd "$BUILD_DIR" + if [ -d "torchstore" ]; then + log_warn "torchstore directory exists, removing..." + rm -rf torchstore + fi + + git clone https://github.com/meta-pytorch/torchstore.git + cd "$BUILD_DIR/torchstore" + git checkout $TORCHSTORE_COMMIT + + pip wheel --no-deps . -w "$WHL_DIR" +} + + +append_date() { + cd ${GITHUB_WORKSPACE}/${REPOSITORY} + # Appends the current date and time to the Forge wheel + version_file="assets/version.txt" + init_file="src/forge/__init__.py" + if [[ -n "$BUILD_VERSION" ]]; then + # Update the version in version.txt + echo "$BUILD_VERSION" > "$version_file" + # Create a variable named __version__ at the end of __init__.py + echo "__version__ = \"$BUILD_VERSION\"" >> "$init_file" + else + echo "Error: BUILD_VERSION environment variable is not set or empty." + exit 1 + fi +} + + +build_monarch +# build_torchstore +# build_torchtitan +append_date \ No newline at end of file diff --git a/.github/workflows/build_vllm.yaml b/.github/workflows/build_vllm.yaml new file mode 100644 index 000000000..088e0fd9e --- /dev/null +++ b/.github/workflows/build_vllm.yaml @@ -0,0 +1,50 @@ +name: Build pinned vLLM against PyTorch nightly and upload + +on: + pull_request: + push: + branches: + - nightly + workflow_dispatch: + +permissions: + id-token: write + contents: read + +jobs: + build: + # if: github.repository_owner == 'pytorch' + name: forge-cu129-nightly + uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@main + strategy: + fail-fast: false + with: + repository: meta-pytorch/forge + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + package-name: forge + build-matrix: | + { + "include": [ + { + "python_version": "3.10", + "gpu_arch_type": "cpu", + "gpu_arch_version": "12.9", + "desired_cuda": "cu129", + "container_image": "pytorch/manylinux2_28-builder:cuda12.9", + "package_type": "manywheel", + "build_name": "manywheel-py3_10-cuda12_9", + "validation_runner": "linux.12xlarge.memory", + "installation": "pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu129", + "channel": "nightly", + "upload_to_base_bucket": "no", + "stable_version": "2.8.0", + "use_split_build": false + } + ] + } + pre-script: .github/packaging/pre_build_cpu.sh + post-script: .github/packaging/post_build_script.sh + trigger-event: ${{ github.event_name }} + build-platform: 'python-build-package' \ No newline at end of file diff --git a/.github/workflows/build_wheels.yaml b/.github/workflows/build_wheels.yaml new file mode 100644 index 000000000..088e0fd9e --- /dev/null +++ b/.github/workflows/build_wheels.yaml @@ -0,0 +1,50 @@ +name: Build pinned vLLM against PyTorch nightly and upload + +on: + pull_request: + push: + branches: + - nightly + workflow_dispatch: + +permissions: + id-token: write + contents: read + +jobs: + build: + # if: github.repository_owner == 'pytorch' + name: forge-cu129-nightly + uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@main + strategy: + fail-fast: false + with: + repository: meta-pytorch/forge + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + package-name: forge + build-matrix: | + { + "include": [ + { + "python_version": "3.10", + "gpu_arch_type": "cpu", + "gpu_arch_version": "12.9", + "desired_cuda": "cu129", + "container_image": "pytorch/manylinux2_28-builder:cuda12.9", + "package_type": "manywheel", + "build_name": "manywheel-py3_10-cuda12_9", + "validation_runner": "linux.12xlarge.memory", + "installation": "pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu129", + "channel": "nightly", + "upload_to_base_bucket": "no", + "stable_version": "2.8.0", + "use_split_build": false + } + ] + } + pre-script: .github/packaging/pre_build_cpu.sh + post-script: .github/packaging/post_build_script.sh + trigger-event: ${{ github.event_name }} + build-platform: 'python-build-package' \ No newline at end of file diff --git a/src/forge/__init__.py b/src/forge/__init__.py index 17d7bd153..b359f9c5b 100644 --- a/src/forge/__init__.py +++ b/src/forge/__init__.py @@ -4,6 +4,8 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +__version__ = "" + # Enables faster downloading. For more info: https://huggingface.co/docs/huggingface_hub/en/guides/download#faster-downloads # To disable, run `HF_HUB_ENABLE_HF_TRANSFER=0 tune download ` try: diff --git a/version.txt b/version.txt new file mode 100644 index 000000000..6e8bf73aa --- /dev/null +++ b/version.txt @@ -0,0 +1 @@ +0.1.0 From c2215afa7e766aba621153671c4e1146bd0ca79d Mon Sep 17 00:00:00 2001 From: Evan Smothers Date: Thu, 2 Oct 2025 13:04:12 -0700 Subject: [PATCH 02/17] fix copy-paste error --- .github/workflows/build_wheels.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build_wheels.yaml b/.github/workflows/build_wheels.yaml index 088e0fd9e..85f27556a 100644 --- a/.github/workflows/build_wheels.yaml +++ b/.github/workflows/build_wheels.yaml @@ -1,4 +1,4 @@ -name: Build pinned vLLM against PyTorch nightly and upload +name: Build nightly wheels and publish to PyTorch Index on: pull_request: @@ -29,13 +29,13 @@ jobs: "include": [ { "python_version": "3.10", - "gpu_arch_type": "cpu", + "gpu_arch_type": "cuda", "gpu_arch_version": "12.9", "desired_cuda": "cu129", "container_image": "pytorch/manylinux2_28-builder:cuda12.9", "package_type": "manywheel", "build_name": "manywheel-py3_10-cuda12_9", - "validation_runner": "linux.12xlarge.memory", + "validation_runner": "linux.4xlarge.nvidia.gpu", "installation": "pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu129", "channel": "nightly", "upload_to_base_bucket": "no", @@ -44,7 +44,7 @@ jobs: } ] } - pre-script: .github/packaging/pre_build_cpu.sh + pre-script: .github/packaging/pre_build_gpu.sh post-script: .github/packaging/post_build_script.sh trigger-event: ${{ github.event_name }} build-platform: 'python-build-package' \ No newline at end of file From 28af813d1afdd08ad8bd2e272b9f47d9efc09a6f Mon Sep 17 00:00:00 2001 From: Evan Smothers Date: Thu, 2 Oct 2025 13:56:53 -0700 Subject: [PATCH 03/17] cp -r --- .github/packaging/post_build_script.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/packaging/post_build_script.sh b/.github/packaging/post_build_script.sh index e1845ac93..736f634c9 100644 --- a/.github/packaging/post_build_script.sh +++ b/.github/packaging/post_build_script.sh @@ -9,4 +9,4 @@ echo "Uploading wheels to S3" ls -l "${WHL_DIR}" ls ${FORGE_WHEEL} echo "Copying files from $WHL_DIR to $DIST" -mkdir -p $DIST && cp $WHL_DIR/* $DIST +mkdir -p $DIST && cp -r $WHL_DIR $DIST From be114cddee0e3ff2a9f9f998e330e0eb51b45060 Mon Sep 17 00:00:00 2001 From: Evan Smothers Date: Thu, 2 Oct 2025 15:50:58 -0700 Subject: [PATCH 04/17] add no-op smoke test --- .github/packaging/smoke_test.sh | 9 +++++++++ .github/workflows/build_vllm.yaml | 1 + .github/workflows/build_wheels.yaml | 1 + 3 files changed, 11 insertions(+) create mode 100644 .github/packaging/smoke_test.sh diff --git a/.github/packaging/smoke_test.sh b/.github/packaging/smoke_test.sh new file mode 100644 index 000000000..157ffdfed --- /dev/null +++ b/.github/packaging/smoke_test.sh @@ -0,0 +1,9 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +echo "no-op smoke test" +echo "Files in dist/ are" +ls -l dist/ diff --git a/.github/workflows/build_vllm.yaml b/.github/workflows/build_vllm.yaml index 088e0fd9e..eb00a5179 100644 --- a/.github/workflows/build_vllm.yaml +++ b/.github/workflows/build_vllm.yaml @@ -46,5 +46,6 @@ jobs: } pre-script: .github/packaging/pre_build_cpu.sh post-script: .github/packaging/post_build_script.sh + smoke-test-script: .github/packaging/smoke_test_script.sh trigger-event: ${{ github.event_name }} build-platform: 'python-build-package' \ No newline at end of file diff --git a/.github/workflows/build_wheels.yaml b/.github/workflows/build_wheels.yaml index 85f27556a..9be29d38b 100644 --- a/.github/workflows/build_wheels.yaml +++ b/.github/workflows/build_wheels.yaml @@ -46,5 +46,6 @@ jobs: } pre-script: .github/packaging/pre_build_gpu.sh post-script: .github/packaging/post_build_script.sh + smoke-test-script: .github/packaging/smoke_test_script.sh trigger-event: ${{ github.event_name }} build-platform: 'python-build-package' \ No newline at end of file From d8277ab81d096535c65bcaf7fa62e821bbd41d6a Mon Sep 17 00:00:00 2001 From: Evan Smothers Date: Thu, 2 Oct 2025 16:19:27 -0700 Subject: [PATCH 05/17] fix bad autocomplete --- .github/workflows/build_vllm.yaml | 2 +- .github/workflows/build_wheels.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_vllm.yaml b/.github/workflows/build_vllm.yaml index eb00a5179..7776a1044 100644 --- a/.github/workflows/build_vllm.yaml +++ b/.github/workflows/build_vllm.yaml @@ -46,6 +46,6 @@ jobs: } pre-script: .github/packaging/pre_build_cpu.sh post-script: .github/packaging/post_build_script.sh - smoke-test-script: .github/packaging/smoke_test_script.sh + smoke-test-script: .github/packaging/smoke_test.sh trigger-event: ${{ github.event_name }} build-platform: 'python-build-package' \ No newline at end of file diff --git a/.github/workflows/build_wheels.yaml b/.github/workflows/build_wheels.yaml index 9be29d38b..6aa503d19 100644 --- a/.github/workflows/build_wheels.yaml +++ b/.github/workflows/build_wheels.yaml @@ -46,6 +46,6 @@ jobs: } pre-script: .github/packaging/pre_build_gpu.sh post-script: .github/packaging/post_build_script.sh - smoke-test-script: .github/packaging/smoke_test_script.sh + smoke-test-script: .github/packaging/smoke_test.sh trigger-event: ${{ github.event_name }} build-platform: 'python-build-package' \ No newline at end of file From a4cef953823514478175aafe6d38c670708399cb Mon Sep 17 00:00:00 2001 From: Evan Smothers Date: Thu, 2 Oct 2025 17:11:24 -0700 Subject: [PATCH 06/17] disable smoke test in my fork --- .github/packaging/smoke_test.sh | 9 --------- .github/workflows/build_vllm.yaml | 5 ++--- .github/workflows/build_wheels.yaml | 5 ++--- 3 files changed, 4 insertions(+), 15 deletions(-) delete mode 100644 .github/packaging/smoke_test.sh diff --git a/.github/packaging/smoke_test.sh b/.github/packaging/smoke_test.sh deleted file mode 100644 index 157ffdfed..000000000 --- a/.github/packaging/smoke_test.sh +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -echo "no-op smoke test" -echo "Files in dist/ are" -ls -l dist/ diff --git a/.github/workflows/build_vllm.yaml b/.github/workflows/build_vllm.yaml index 7776a1044..3a14e1cca 100644 --- a/.github/workflows/build_vllm.yaml +++ b/.github/workflows/build_vllm.yaml @@ -21,8 +21,8 @@ jobs: with: repository: meta-pytorch/forge ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main + test-infra-repository: ebsmothers/test-infra + test-infra-ref: 5b44568eb57cb629f9d579311efd0ed6067ea234 package-name: forge build-matrix: | { @@ -46,6 +46,5 @@ jobs: } pre-script: .github/packaging/pre_build_cpu.sh post-script: .github/packaging/post_build_script.sh - smoke-test-script: .github/packaging/smoke_test.sh trigger-event: ${{ github.event_name }} build-platform: 'python-build-package' \ No newline at end of file diff --git a/.github/workflows/build_wheels.yaml b/.github/workflows/build_wheels.yaml index 6aa503d19..e0e6b6627 100644 --- a/.github/workflows/build_wheels.yaml +++ b/.github/workflows/build_wheels.yaml @@ -21,8 +21,8 @@ jobs: with: repository: meta-pytorch/forge ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main + test-infra-repository: ebsmothers/test-infra + test-infra-ref: 5b44568eb57cb629f9d579311efd0ed6067ea234 package-name: forge build-matrix: | { @@ -46,6 +46,5 @@ jobs: } pre-script: .github/packaging/pre_build_gpu.sh post-script: .github/packaging/post_build_script.sh - smoke-test-script: .github/packaging/smoke_test.sh trigger-event: ${{ github.event_name }} build-platform: 'python-build-package' \ No newline at end of file From f5c8cb560c47832d0227cf88f07f21503fd298ad Mon Sep 17 00:00:00 2001 From: Evan Smothers Date: Thu, 2 Oct 2025 20:27:39 -0700 Subject: [PATCH 07/17] idk maybe this will work? --- .github/workflows/build_vllm.yaml | 2 +- .github/workflows/build_wheels.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_vllm.yaml b/.github/workflows/build_vllm.yaml index 3a14e1cca..721603704 100644 --- a/.github/workflows/build_vllm.yaml +++ b/.github/workflows/build_vllm.yaml @@ -15,7 +15,7 @@ jobs: build: # if: github.repository_owner == 'pytorch' name: forge-cu129-nightly - uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@main + uses: ebsmothers/test-infra/.github/workflows/build_wheels_linux.yml@main strategy: fail-fast: false with: diff --git a/.github/workflows/build_wheels.yaml b/.github/workflows/build_wheels.yaml index e0e6b6627..a282702d9 100644 --- a/.github/workflows/build_wheels.yaml +++ b/.github/workflows/build_wheels.yaml @@ -15,7 +15,7 @@ jobs: build: # if: github.repository_owner == 'pytorch' name: forge-cu129-nightly - uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@main + uses: ebsmothers/test-infra/.github/workflows/build_wheels_linux.yml@main strategy: fail-fast: false with: From a0aba883f0532ace7475dc8f7df0e0c41ffa8be9 Mon Sep 17 00:00:00 2001 From: Evan Smothers Date: Fri, 3 Oct 2025 08:38:40 -0700 Subject: [PATCH 08/17] use new commit hash --- .github/workflows/build_vllm.yaml | 3 ++- .github/workflows/build_wheels.yaml | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_vllm.yaml b/.github/workflows/build_vllm.yaml index 721603704..5c7ad57cf 100644 --- a/.github/workflows/build_vllm.yaml +++ b/.github/workflows/build_vllm.yaml @@ -22,7 +22,8 @@ jobs: repository: meta-pytorch/forge ref: "" test-infra-repository: ebsmothers/test-infra - test-infra-ref: 5b44568eb57cb629f9d579311efd0ed6067ea234 + test-infra-ref: ff517c0858457180586230eb6105bbbdd1d05264 + run-smoke-test: false package-name: forge build-matrix: | { diff --git a/.github/workflows/build_wheels.yaml b/.github/workflows/build_wheels.yaml index a282702d9..d7a9d5022 100644 --- a/.github/workflows/build_wheels.yaml +++ b/.github/workflows/build_wheels.yaml @@ -22,7 +22,8 @@ jobs: repository: meta-pytorch/forge ref: "" test-infra-repository: ebsmothers/test-infra - test-infra-ref: 5b44568eb57cb629f9d579311efd0ed6067ea234 + test-infra-ref: ff517c0858457180586230eb6105bbbdd1d05264 + run-smoke-test: false package-name: forge build-matrix: | { From 166e5ed5123336973d5e05498e54c028a47a7504 Mon Sep 17 00:00:00 2001 From: Evan Smothers Date: Fri, 3 Oct 2025 09:56:34 -0700 Subject: [PATCH 09/17] use commit hash from main test-infra --- .github/workflows/build_vllm.yaml | 6 +++--- .github/workflows/build_wheels.yaml | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build_vllm.yaml b/.github/workflows/build_vllm.yaml index 5c7ad57cf..655f575b5 100644 --- a/.github/workflows/build_vllm.yaml +++ b/.github/workflows/build_vllm.yaml @@ -15,14 +15,14 @@ jobs: build: # if: github.repository_owner == 'pytorch' name: forge-cu129-nightly - uses: ebsmothers/test-infra/.github/workflows/build_wheels_linux.yml@main + uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@main strategy: fail-fast: false with: repository: meta-pytorch/forge ref: "" - test-infra-repository: ebsmothers/test-infra - test-infra-ref: ff517c0858457180586230eb6105bbbdd1d05264 + test-infra-repository: pytorch/test-infra + test-infra-ref: ec36a78be63f4f7f836fbec087746d853150ffc0 run-smoke-test: false package-name: forge build-matrix: | diff --git a/.github/workflows/build_wheels.yaml b/.github/workflows/build_wheels.yaml index d7a9d5022..151da7144 100644 --- a/.github/workflows/build_wheels.yaml +++ b/.github/workflows/build_wheels.yaml @@ -15,13 +15,13 @@ jobs: build: # if: github.repository_owner == 'pytorch' name: forge-cu129-nightly - uses: ebsmothers/test-infra/.github/workflows/build_wheels_linux.yml@main + uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@main strategy: fail-fast: false with: repository: meta-pytorch/forge ref: "" - test-infra-repository: ebsmothers/test-infra + test-infra-repository: pytorch/test-infra test-infra-ref: ff517c0858457180586230eb6105bbbdd1d05264 run-smoke-test: false package-name: forge From 3c3078d48b2966da9f78de457e91472c9f28a973 Mon Sep 17 00:00:00 2001 From: Evan Smothers Date: Fri, 3 Oct 2025 10:11:03 -0700 Subject: [PATCH 10/17] add commit hash in one more place --- .github/workflows/build_vllm.yaml | 2 +- .github/workflows/build_wheels.yaml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build_vllm.yaml b/.github/workflows/build_vllm.yaml index 655f575b5..09f27f3bd 100644 --- a/.github/workflows/build_vllm.yaml +++ b/.github/workflows/build_vllm.yaml @@ -15,7 +15,7 @@ jobs: build: # if: github.repository_owner == 'pytorch' name: forge-cu129-nightly - uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@main + uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@ec36a78be63f4f7f836fbec087746d853150ffc0 strategy: fail-fast: false with: diff --git a/.github/workflows/build_wheels.yaml b/.github/workflows/build_wheels.yaml index 151da7144..ef872ab9b 100644 --- a/.github/workflows/build_wheels.yaml +++ b/.github/workflows/build_wheels.yaml @@ -15,14 +15,14 @@ jobs: build: # if: github.repository_owner == 'pytorch' name: forge-cu129-nightly - uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@main + uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@ec36a78be63f4f7f836fbec087746d853150ffc0 strategy: fail-fast: false with: repository: meta-pytorch/forge ref: "" test-infra-repository: pytorch/test-infra - test-infra-ref: ff517c0858457180586230eb6105bbbdd1d05264 + test-infra-ref: ec36a78be63f4f7f836fbec087746d853150ffc0 run-smoke-test: false package-name: forge build-matrix: | From 1d75f28144bfed6014dfaf0dc0492f7052a6c39b Mon Sep 17 00:00:00 2001 From: Evan Smothers Date: Fri, 3 Oct 2025 11:04:42 -0700 Subject: [PATCH 11/17] debug --- .github/packaging/post_build_script.sh | 1 + .github/workflows/build_vllm.yaml | 4 ++-- .github/workflows/build_wheels.yaml | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/packaging/post_build_script.sh b/.github/packaging/post_build_script.sh index 736f634c9..0e9c8decf 100644 --- a/.github/packaging/post_build_script.sh +++ b/.github/packaging/post_build_script.sh @@ -10,3 +10,4 @@ ls -l "${WHL_DIR}" ls ${FORGE_WHEEL} echo "Copying files from $WHL_DIR to $DIST" mkdir -p $DIST && cp -r $WHL_DIR $DIST +ls -l "${DIST}" \ No newline at end of file diff --git a/.github/workflows/build_vllm.yaml b/.github/workflows/build_vllm.yaml index 09f27f3bd..b5b6535f5 100644 --- a/.github/workflows/build_vllm.yaml +++ b/.github/workflows/build_vllm.yaml @@ -15,14 +15,14 @@ jobs: build: # if: github.repository_owner == 'pytorch' name: forge-cu129-nightly - uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@ec36a78be63f4f7f836fbec087746d853150ffc0 + uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@28a1b658404f17c8eabde5f7fe25ae3ac826fae6 strategy: fail-fast: false with: repository: meta-pytorch/forge ref: "" test-infra-repository: pytorch/test-infra - test-infra-ref: ec36a78be63f4f7f836fbec087746d853150ffc0 + test-infra-ref: 28a1b658404f17c8eabde5f7fe25ae3ac826fae6 run-smoke-test: false package-name: forge build-matrix: | diff --git a/.github/workflows/build_wheels.yaml b/.github/workflows/build_wheels.yaml index ef872ab9b..230441b2d 100644 --- a/.github/workflows/build_wheels.yaml +++ b/.github/workflows/build_wheels.yaml @@ -15,14 +15,14 @@ jobs: build: # if: github.repository_owner == 'pytorch' name: forge-cu129-nightly - uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@ec36a78be63f4f7f836fbec087746d853150ffc0 + uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@28a1b658404f17c8eabde5f7fe25ae3ac826fae6 strategy: fail-fast: false with: repository: meta-pytorch/forge ref: "" test-infra-repository: pytorch/test-infra - test-infra-ref: ec36a78be63f4f7f836fbec087746d853150ffc0 + test-infra-ref: 28a1b658404f17c8eabde5f7fe25ae3ac826fae6 run-smoke-test: false package-name: forge build-matrix: | From 5c5a1fa6081208ab0e5b285cf7fb740f42c6c067 Mon Sep 17 00:00:00 2001 From: Evan Smothers Date: Fri, 3 Oct 2025 12:58:35 -0700 Subject: [PATCH 12/17] [not for land] changes for faster debugging --- .github/packaging/post_build_script.sh | 3 ++- .github/packaging/pre_build_cpu.sh | 12 +++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/.github/packaging/post_build_script.sh b/.github/packaging/post_build_script.sh index 0e9c8decf..5cfd846bd 100644 --- a/.github/packaging/post_build_script.sh +++ b/.github/packaging/post_build_script.sh @@ -10,4 +10,5 @@ ls -l "${WHL_DIR}" ls ${FORGE_WHEEL} echo "Copying files from $WHL_DIR to $DIST" mkdir -p $DIST && cp -r $WHL_DIR $DIST -ls -l "${DIST}" \ No newline at end of file +ls -l "${DIST}" +ls -l "${DIST}/wheels" \ No newline at end of file diff --git a/.github/packaging/pre_build_cpu.sh b/.github/packaging/pre_build_cpu.sh index 4bc843701..fbebdcdb4 100644 --- a/.github/packaging/pre_build_cpu.sh +++ b/.github/packaging/pre_build_cpu.sh @@ -30,5 +30,15 @@ build_vllm() { pip wheel -v --no-build-isolation --no-deps . -w "$WHL_DIR" } +build_debug() { + cd "$BUILD_DIR" + + git clone https://github.com/meta-pytorch/torchtune.git + cd "$BUILD_DIR/torchtune" + + pip install -r requirements/build.txt + pip wheel -v --no-build-isolation --no-deps . -w "$WHL_DIR" +} -build_vllm \ No newline at end of file +build_debug +# build_vllm \ No newline at end of file From 211b1e603b4f4fa708d5a7b72d9d06ddd9657753 Mon Sep 17 00:00:00 2001 From: Evan Smothers Date: Fri, 3 Oct 2025 13:10:33 -0700 Subject: [PATCH 13/17] idk --- .github/packaging/post_build_script.sh | 5 ++--- .github/packaging/pre_build_cpu.sh | 1 - 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/packaging/post_build_script.sh b/.github/packaging/post_build_script.sh index 5cfd846bd..95eb87858 100644 --- a/.github/packaging/post_build_script.sh +++ b/.github/packaging/post_build_script.sh @@ -2,13 +2,12 @@ set -euxo pipefail FORGE_WHEEL=${GITHUB_WORKSPACE}/${REPOSITORY}/dist/*.whl -WHL_DIR="${GITHUB_WORKSPACE}/wheels/" +WHL_DIR="${GITHUB_WORKSPACE}/wheels/dist" DIST=dist/ echo "Uploading wheels to S3" ls -l "${WHL_DIR}" ls ${FORGE_WHEEL} echo "Copying files from $WHL_DIR to $DIST" -mkdir -p $DIST && cp -r $WHL_DIR $DIST +mkdir -p $DIST && rsync -r --exclude="/*/" $WHL_DIR/ $DIST/ ls -l "${DIST}" -ls -l "${DIST}/wheels" \ No newline at end of file diff --git a/.github/packaging/pre_build_cpu.sh b/.github/packaging/pre_build_cpu.sh index fbebdcdb4..8689cfbec 100644 --- a/.github/packaging/pre_build_cpu.sh +++ b/.github/packaging/pre_build_cpu.sh @@ -36,7 +36,6 @@ build_debug() { git clone https://github.com/meta-pytorch/torchtune.git cd "$BUILD_DIR/torchtune" - pip install -r requirements/build.txt pip wheel -v --no-build-isolation --no-deps . -w "$WHL_DIR" } From ecc1dc05e8b0191a3b3b7066510e258f3aa7d0a2 Mon Sep 17 00:00:00 2001 From: Evan Smothers Date: Fri, 3 Oct 2025 13:27:45 -0700 Subject: [PATCH 14/17] no rsync --- .github/packaging/post_build_script.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/packaging/post_build_script.sh b/.github/packaging/post_build_script.sh index 95eb87858..5b721c22f 100644 --- a/.github/packaging/post_build_script.sh +++ b/.github/packaging/post_build_script.sh @@ -9,5 +9,5 @@ echo "Uploading wheels to S3" ls -l "${WHL_DIR}" ls ${FORGE_WHEEL} echo "Copying files from $WHL_DIR to $DIST" -mkdir -p $DIST && rsync -r --exclude="/*/" $WHL_DIR/ $DIST/ +mkdir -p $DIST && find "$WHL_DIR" -maxdepth 1 -type f -exec cp {} "$DIST/" \; ls -l "${DIST}" From 65bec8eb24aae413300eaf686a1d8b7b3aa991c2 Mon Sep 17 00:00:00 2001 From: Evan Smothers Date: Fri, 3 Oct 2025 14:16:03 -0700 Subject: [PATCH 15/17] final changes --- .github/packaging/pre_build_cpu.sh | 12 +------ .github/packaging/pre_build_gpu.sh | 50 ++--------------------------- .github/workflows/build_vllm.yaml | 1 + .github/workflows/build_wheels.yaml | 1 + 4 files changed, 5 insertions(+), 59 deletions(-) diff --git a/.github/packaging/pre_build_cpu.sh b/.github/packaging/pre_build_cpu.sh index 8689cfbec..b0568ec58 100644 --- a/.github/packaging/pre_build_cpu.sh +++ b/.github/packaging/pre_build_cpu.sh @@ -30,14 +30,4 @@ build_vllm() { pip wheel -v --no-build-isolation --no-deps . -w "$WHL_DIR" } -build_debug() { - cd "$BUILD_DIR" - - git clone https://github.com/meta-pytorch/torchtune.git - cd "$BUILD_DIR/torchtune" - - pip wheel -v --no-build-isolation --no-deps . -w "$WHL_DIR" -} - -build_debug -# build_vllm \ No newline at end of file +build_vllm \ No newline at end of file diff --git a/.github/packaging/pre_build_gpu.sh b/.github/packaging/pre_build_gpu.sh index 9ae2b8ff9..d81f52782 100644 --- a/.github/packaging/pre_build_gpu.sh +++ b/.github/packaging/pre_build_gpu.sh @@ -1,13 +1,10 @@ #!/bin/bash set -euxo pipefail -# Builds vLLM, Monarch and torchstore -# This script builds vLLM, Monarch and torchstore and places -# their wheels into dist/. +# Builds Monarch +# This script builds Monarch and places its wheel into dist/. MONARCH_COMMIT="265034a29ec3fb35919f4a9c23c65f2f4237190d" -TORCHTITAN_COMMIT="82f0287b966f1735819a377a9a09e7a303c55faa" -TORCHSTORE_COMMIT="main" BUILD_DIR="$HOME/forge-build" # Push other files to the dist folder @@ -18,23 +15,6 @@ mkdir -p $WHL_DIR echo "build dir is $BUILD_DIR" echo "wheel dir is $WHL_DIR" -build_vllm() { - cd "$BUILD_DIR" - - git clone https://github.com/vllm-project/vllm.git --branch $VLLM_BRANCH - cd "$BUILD_DIR/vllm" - - python use_existing_torch.py - pip install -r requirements/build.txt - export VERBOSE=1 - export CMAKE_VERBOSE_MAKEFILE=1 - export FORCE_CMAKE=1 - # export MAX_JOBS=2 # don't resource starve the host - # export CMAKE_BUILD_PARALLEL_LEVEL=2 - # export MAKEFLAGS=-j2 - pip wheel -v --no-build-isolation --no-deps . -w "$WHL_DIR" -} - build_monarch() { # Get Rust build related pieces if ! command -v rustup &> /dev/null; then @@ -70,30 +50,6 @@ build_monarch() { pip wheel --no-build-isolation --no-deps . -w "$WHL_DIR" } -build_torchtitan() { - cd "$BUILD_DIR" - git clone https://github.com/pytorch/torchtitan.git - cd "$BUILD_DIR/torchtitan" - git checkout $TORCHTITAN_COMMIT - - pip wheel --no-deps . -w "$WHL_DIR" -} - -build_torchstore() { - cd "$BUILD_DIR" - if [ -d "torchstore" ]; then - log_warn "torchstore directory exists, removing..." - rm -rf torchstore - fi - - git clone https://github.com/meta-pytorch/torchstore.git - cd "$BUILD_DIR/torchstore" - git checkout $TORCHSTORE_COMMIT - - pip wheel --no-deps . -w "$WHL_DIR" -} - - append_date() { cd ${GITHUB_WORKSPACE}/${REPOSITORY} # Appends the current date and time to the Forge wheel @@ -112,6 +68,4 @@ append_date() { build_monarch -# build_torchstore -# build_torchtitan append_date \ No newline at end of file diff --git a/.github/workflows/build_vllm.yaml b/.github/workflows/build_vllm.yaml index b5b6535f5..f3f761853 100644 --- a/.github/workflows/build_vllm.yaml +++ b/.github/workflows/build_vllm.yaml @@ -24,6 +24,7 @@ jobs: test-infra-repository: pytorch/test-infra test-infra-ref: 28a1b658404f17c8eabde5f7fe25ae3ac826fae6 run-smoke-test: false + wheel-upload-path: preview/forge package-name: forge build-matrix: | { diff --git a/.github/workflows/build_wheels.yaml b/.github/workflows/build_wheels.yaml index 230441b2d..8ba7989bd 100644 --- a/.github/workflows/build_wheels.yaml +++ b/.github/workflows/build_wheels.yaml @@ -24,6 +24,7 @@ jobs: test-infra-repository: pytorch/test-infra test-infra-ref: 28a1b658404f17c8eabde5f7fe25ae3ac826fae6 run-smoke-test: false + wheel-upload-path: preview/forge package-name: forge build-matrix: | { From 10acfef45a5bdd6c49107d2bf1906179cf10fa9a Mon Sep 17 00:00:00 2001 From: Evan Smothers Date: Fri, 3 Oct 2025 14:19:12 -0700 Subject: [PATCH 16/17] correct comment --- .github/packaging/pre_build_cpu.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/packaging/pre_build_cpu.sh b/.github/packaging/pre_build_cpu.sh index b0568ec58..520bdedb1 100644 --- a/.github/packaging/pre_build_cpu.sh +++ b/.github/packaging/pre_build_cpu.sh @@ -1,9 +1,8 @@ #!/bin/bash set -euxo pipefail -# Builds vLLM, Monarch and torchstore -# This script builds vLLM, Monarch and torchstore and places -# their wheels into dist/. +# Builds vLLM +# This script builds vLLM and places its wheel into dist/. VLLM_BRANCH="v0.10.0" BUILD_DIR="$HOME/forge-build" From 2802de7593813df64608b28acf16c8f07a85e01e Mon Sep 17 00:00:00 2001 From: Evan Smothers Date: Fri, 3 Oct 2025 14:31:46 -0700 Subject: [PATCH 17/17] address comments --- .github/packaging/post_build_script.sh | 2 +- .github/workflows/build_vllm.yaml | 2 -- .github/workflows/build_wheels.yaml | 2 -- 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/packaging/post_build_script.sh b/.github/packaging/post_build_script.sh index 5b721c22f..a4e2fc888 100644 --- a/.github/packaging/post_build_script.sh +++ b/.github/packaging/post_build_script.sh @@ -5,9 +5,9 @@ FORGE_WHEEL=${GITHUB_WORKSPACE}/${REPOSITORY}/dist/*.whl WHL_DIR="${GITHUB_WORKSPACE}/wheels/dist" DIST=dist/ -echo "Uploading wheels to S3" ls -l "${WHL_DIR}" ls ${FORGE_WHEEL} echo "Copying files from $WHL_DIR to $DIST" mkdir -p $DIST && find "$WHL_DIR" -maxdepth 1 -type f -exec cp {} "$DIST/" \; +echo "The following wheels will be uploaded to S3" ls -l "${DIST}" diff --git a/.github/workflows/build_vllm.yaml b/.github/workflows/build_vllm.yaml index f3f761853..6938e16a6 100644 --- a/.github/workflows/build_vllm.yaml +++ b/.github/workflows/build_vllm.yaml @@ -1,7 +1,6 @@ name: Build pinned vLLM against PyTorch nightly and upload on: - pull_request: push: branches: - nightly @@ -13,7 +12,6 @@ permissions: jobs: build: - # if: github.repository_owner == 'pytorch' name: forge-cu129-nightly uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@28a1b658404f17c8eabde5f7fe25ae3ac826fae6 strategy: diff --git a/.github/workflows/build_wheels.yaml b/.github/workflows/build_wheels.yaml index 8ba7989bd..22c183314 100644 --- a/.github/workflows/build_wheels.yaml +++ b/.github/workflows/build_wheels.yaml @@ -1,7 +1,6 @@ name: Build nightly wheels and publish to PyTorch Index on: - pull_request: push: branches: - nightly @@ -13,7 +12,6 @@ permissions: jobs: build: - # if: github.repository_owner == 'pytorch' name: forge-cu129-nightly uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@28a1b658404f17c8eabde5f7fe25ae3ac826fae6 strategy: