diff --git a/.github/packaging/export_channel_test.sh b/.github/packaging/export_channel_test.sh
new file mode 100644
index 000000000..8518cee97
--- /dev/null
+++ b/.github/packaging/export_channel_test.sh
@@ -0,0 +1 @@
+CHANNEL="test"
diff --git a/.github/packaging/vllm_reqs.txt b/.github/packaging/vllm_reqs.txt
index aad2b28bd..c7d38ec64 100644
--- a/.github/packaging/vllm_reqs.txt
+++ b/.github/packaging/vllm_reqs.txt
@@ -7,20 +7,20 @@
 # See the file .github/workflows/gpu_test.yaml for an E2E forge installation using this approach.
 # TODO: this should be done way less hackily
 aiohappyeyeballs==2.6.1
-aiohttp==3.13.0
+aiohttp==3.13.1
 aiosignal==1.4.0
 annotated-types==0.7.0
 anyio==4.11.0
 astor==0.8.1
 async-timeout==5.0.1
 attrs==25.4.0
-blake3==1.0.7
-cachetools==6.2.0
+blake3==1.0.8
+cachetools==6.2.1
 cbor2==5.7.0
 certifi==2025.10.5
 cffi==2.0.0
-charset-normalizer==3.4.3
-click==8.3.0
+charset-normalizer==3.4.4
+click==8.2.1
 cloudpickle==3.1.1
 cmake==4.1.0
 compressed-tensors==0.10.2
@@ -33,7 +33,7 @@ dnspython==2.8.0
 einops==0.8.1
 email-validator==2.3.0
 exceptiongroup==1.3.0
-fastapi==0.118.3
+fastapi==0.119.0
 fastapi-cli==0.0.13
 fastapi-cloud-cli==0.3.1
 fastrlock==0.8.3
@@ -47,10 +47,10 @@ httpcore==1.0.9
 httptools==0.7.1
 httpx==0.28.1
 huggingface-hub==0.35.3
-idna==3.10
+idna==3.11
 interegular==0.3.3
 Jinja2==3.1.6
-jiter==0.11.0
+jiter==0.11.1
 jsonschema==4.25.1
 jsonschema-specifications==2025.9.1
 lark==1.2.2
@@ -58,70 +58,69 @@ llguidance==0.7.30
 llvmlite==0.44.0
 lm-format-enforcer==0.10.12
 markdown-it-py==4.0.0
-MarkupSafe==3.0.2
+MarkupSafe==2.1.5
 mdurl==0.1.2
 mistral_common==1.8.5
 mpmath==1.3.0
 msgpack==1.1.2
 msgspec==0.19.0
 multidict==6.7.0
-networkx==3.4.2
+networkx==3.3
 ninja==1.13.0
 numba==0.61.2
 numpy==2.2.6
-nvidia-cublas-cu12==12.9.1.4
-nvidia-cuda-cupti-cu12==12.9.79
-nvidia-cuda-nvrtc-cu12==12.9.86
-nvidia-cuda-runtime-cu12==12.9.79
+nvidia-cublas-cu12==12.8.4.1
+nvidia-cuda-cupti-cu12==12.8.90
+nvidia-cuda-nvrtc-cu12==12.8.93
+nvidia-cuda-runtime-cu12==12.8.90
 nvidia-cudnn-cu12==9.10.2.21
-nvidia-cufft-cu12==11.4.1.4
-nvidia-cufile-cu12==1.14.1.1
-nvidia-curand-cu12==10.3.10.19
-nvidia-cusolver-cu12==11.7.5.82
-nvidia-cusparse-cu12==12.5.10.65
+nvidia-cufft-cu12==11.3.3.83
+nvidia-cufile-cu12==1.13.1.3
+nvidia-curand-cu12==10.3.9.90
+nvidia-cusolver-cu12==11.7.3.90
+nvidia-cusparse-cu12==12.5.8.93
 nvidia-cusparselt-cu12==0.7.1
 nvidia-nccl-cu12==2.27.5
-nvidia-nvjitlink-cu12==12.9.86
+nvidia-nvjitlink-cu12==12.8.93
 nvidia-nvshmem-cu12==3.3.20
-nvidia-nvtx-cu12==12.9.79
+nvidia-nvtx-cu12==12.8.90
 openai==1.90.0
 opencv-python-headless==4.12.0.88
 outlines_core==0.2.10
 packaging==25.0
 partial-json-parser==0.2.1.1.post6
-pillow==11.3.0
+pillow==12.0.0
 prometheus-fastapi-instrumentator==7.1.0
 prometheus_client==0.23.1
 propcache==0.4.1
-protobuf==6.32.1
+protobuf==6.33.0
 psutil==7.1.0
 py-cpuinfo==9.0.0
 pybase64==1.4.2
 pycountry==24.6.1
 pycparser==2.23
-pydantic==2.12.0
+pydantic==2.12.3
 pydantic-extra-types==2.10.6
-pydantic_core==2.41.1
+pydantic_core==2.41.4
 Pygments==2.19.2
 python-dotenv==1.1.1
 python-json-logger==4.0.0
 python-multipart==0.0.20
-pytorch-triton==3.4.0+gitf7888497
 PyYAML==6.0.3
 pyzmq==27.1.0
-ray==2.49.2
-referencing==0.36.2
+ray==2.50.0
+referencing==0.37.0
 regex==2025.9.18
 requests==2.32.5
 rich==14.2.0
 rich-toolkit==0.15.1
-rignore==0.7.0
+rignore==0.7.1
 rpds-py==0.27.1
 safetensors==0.6.2
 scipy==1.15.3
 sentencepiece==0.2.1
-sentry-sdk==2.41.0
-setuptools-scm==9.2.0
+sentry-sdk==2.42.0
+setuptools-scm==9.2.1
 shellingham==1.5.4
 sniffio==1.3.1
 soundfile==0.13.1
@@ -131,17 +130,17 @@ sympy==1.14.0
 tiktoken==0.12.0
 tokenizers==0.22.1
 tomli==2.3.0
-torch==2.9.0.dev20250905+cu129
+torch==2.9.0+cu128
 tqdm==4.67.1
-transformers==4.57.0
-triton==3.4.0
+transformers==4.57.1
+triton==3.5.0
 typer==0.19.2
 typing-inspection==0.4.2
 typing_extensions==4.15.0
 urllib3==2.5.0
 uvicorn==0.37.0
-uvloop==0.21.0
-watchfiles==1.1.0
+uvloop==0.22.1
+watchfiles==1.1.1
 websockets==15.0.1
 xgrammar==0.1.21
 yarl==1.22.0
diff --git a/.github/workflows/build_vllm.yaml b/.github/workflows/build_vllm.yaml
index 0e8279ac4..0748b937a 100644
--- a/.github/workflows/build_vllm.yaml
+++ b/.github/workflows/build_vllm.yaml
@@ -1,10 +1,8 @@
-name: Build pinned vLLM against PyTorch nightly and upload
+name: Build pinned vLLM against PyTorch stable and upload
 
 on:
-  push:
-    branches:
-      - nightly
   workflow_dispatch:
+  pull_request: # Remove this before landing
 
 permissions:
   id-token: write
@@ -12,18 +10,20 @@ permissions:
 
 jobs:
   build:
-    name: forge-cu129-nightly
-    uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@main
+    name: forge-cu128
+    uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@vllm-push
     strategy:
       fail-fast: false
     with:
       repository: meta-pytorch/forge
-      ref: ""
+      ref: main
       test-infra-repository: pytorch/test-infra
-      test-infra-ref: main
+      test-infra-ref: 'vllm-push'
       run-smoke-test: false
-      wheel-nightly-policy: gha_workflow_preview_build_wheels
-      wheel-upload-path: whl/preview/forge/
+      channel: test
+      # env-var-script: .github/packaging/export_channel_test.sh
+      # wheel-nightly-policy: gha_workflow_preview_build_wheels
+      # wheel-upload-path: whl/test/forge/
       package-name: forge
       build-matrix: |
         {
@@ -31,16 +31,16 @@ jobs:
             {
               "python_version": "3.10",
               "gpu_arch_type": "cpu",
-              "gpu_arch_version": "12.9",
-              "desired_cuda": "cu129",
-              "container_image": "pytorch/manylinux2_28-builder:cuda12.9",
+              "gpu_arch_version": "12.8",
+              "desired_cuda": "cu128",
+              "container_image": "pytorch/manylinux2_28-builder:cuda12.8",
               "package_type": "manywheel",
-              "build_name": "manywheel-py3_10-cuda12_9",
+              "build_name": "manywheel-py3_10-cuda12_8",
               "validation_runner": "linux.12xlarge.memory",
-              "installation": "pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu129",
-              "channel": "nightly",
+              "installation": "pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128",
+              "channel": "test",
               "upload_to_base_bucket": "no",
-              "stable_version": "2.8.0",
+              "stable_version": "2.9.0",
               "use_split_build": false
             }
           ]
diff --git a/.github/workflows/build_vllm_nightly.yaml b/.github/workflows/build_vllm_nightly.yaml
new file mode 100644
index 000000000..3cddb274f
--- /dev/null
+++ b/.github/workflows/build_vllm_nightly.yaml
@@ -0,0 +1,52 @@
+name: Build pinned vLLM against PyTorch nightly and upload
+
+on:
+  push:
+    branches:
+      - nightly
+  workflow_dispatch:
+  pull_request: # Remove this before landing
+
+permissions:
+  id-token: write
+  contents: read
+
+jobs:
+  build:
+    name: forge-cu128-nightly
+    uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@main
+    strategy:
+      fail-fast: false
+    with:
+      repository: meta-pytorch/forge
+      ref: ""
+      test-infra-repository: pytorch/test-infra
+      test-infra-ref: main
+      run-smoke-test: false
+      wheel-nightly-policy: gha_workflow_preview_build_wheels
+      wheel-upload-path: whl/preview/forge/
+      package-name: forge
+      build-matrix: |
+        {
+          "include": [
+            {
+              "python_version": "3.10",
+              "gpu_arch_type": "cpu",
+              "gpu_arch_version": "12.8",
+              "desired_cuda": "cu128",
+              "container_image": "pytorch/manylinux2_28-builder:cuda12.8",
+              "package_type": "manywheel",
+              "build_name": "manywheel-py3_10-cuda12_8",
+              "validation_runner": "linux.12xlarge.memory",
+              "installation": "pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128",
+              "channel": "nightly",
+              "upload_to_base_bucket": "no",
+              "stable_version": "2.9.0",
+              "use_split_build": false
+            }
+          ]
+        }
+      pre-script: .github/packaging/pre_build_cpu.sh
+      post-script: .github/packaging/post_build_script.sh
+      trigger-event: ${{ github.event_name }}
+      build-platform: 'python-build-package'
diff --git a/.github/workflows/build_wheels.yaml b/.github/workflows/build_wheels.yaml
index a384991cd..1c1b6a6b8 100644
--- a/.github/workflows/build_wheels.yaml
+++ b/.github/workflows/build_wheels.yaml
@@ -12,7 +12,7 @@ permissions:
 
 jobs:
   build:
-    name: forge-cu129-nightly
+    name: forge-cu128-nightly
     uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@main
     strategy:
       fail-fast: false
@@ -31,16 +31,16 @@ jobs:
             {
               "python_version": "3.10",
               "gpu_arch_type": "cuda",
-              "gpu_arch_version": "12.9",
-              "desired_cuda": "cu129",
-              "container_image": "pytorch/manylinux2_28-builder:cuda12.9",
+              "gpu_arch_version": "12.8",
+              "desired_cuda": "cu128",
+              "container_image": "pytorch/manylinux2_28-builder:cuda12.8",
               "package_type": "manywheel",
-              "build_name": "manywheel-py3_10-cuda12_9",
+              "build_name": "manywheel-py3_10-cuda12_8",
               "validation_runner": "linux.4xlarge.nvidia.gpu",
-              "installation": "pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu129",
+              "installation": "pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128",
               "channel": "nightly",
               "upload_to_base_bucket": "no",
-              "stable_version": "2.8.0",
+              "stable_version": "2.9.0",
               "use_split_build": false
             }
           ]
diff --git a/.github/workflows/gpu_test.yaml b/.github/workflows/gpu_test.yaml
index c2a4705e5..2302a95ff 100644
--- a/.github/workflows/gpu_test.yaml
+++ b/.github/workflows/gpu_test.yaml
@@ -41,14 +41,14 @@ jobs:
       - name: Update pip
         run: python -m pip install --upgrade pip
       - name: Install pinned torch nightly
-        run: python -m pip install --pre torch==2.9.0.dev20250905 --no-cache-dir --index-url https://download.pytorch.org/whl/nightly/cu129
+        run: python -m pip install torch==2.9.0 --no-cache-dir --index-url https://download.pytorch.org/whl/cu128
       - name: Download and install vLLM and its dependencies
         # TODO: this honestly could not be hackier if I tried
         run: |
           python -m pip install -r .github/packaging/vllm_reqs.txt
-          python -m pip install vllm==0.10.1.dev0+g6d8d0a24c.d20251009.cu129 --no-cache-dir --index-url https://download.pytorch.org/whl/preview/forge
+          python -m pip install vllm==0.10.1.dev0+g6d8d0a24c.d20251009.cu128 --no-cache-dir --index-url https://download.pytorch.org/whl/preview/forge
       - name: Install Monarch
-        run: pip install torchmonarch==0.1.0rc1
+        run: pip install torchmonarch==0.1.0rc4
       - name: Install torchtitan and torchstore
         run: |
           python -m pip install git+https://github.com/pytorch/torchtitan.git
diff --git a/assets/versions.sh b/assets/versions.sh
index 49a755dc0..45485d7af 100644
--- a/assets/versions.sh
+++ b/assets/versions.sh
@@ -5,15 +5,10 @@
 # LICENSE file in the root directory of this source tree.
 
 # Version Configuration for Forge Wheel Building
-# This file contains all pinned versions and commits for dependencies
+# This file contains all pinned versions/tags/commits for dependencies
+PYTORCH_VERSION="2.9.0"
+MONARCH_VERSION="0.1.0rc4"
 
-# PyTorch version
-PYTORCH_VERSION="2.9.0.dev20250905"
-
-# vLLM branch
 VLLM_BRANCH="v0.10.0"
-
-# Commit hashes
-MONARCH_COMMIT="195503223b5c2896846171f60ac99dc6868f8f2c"
-TORCHTITAN_COMMIT="d0e25450bcac2332359b13fbda430dc701f073d4"
+TORCHTITAN_COMMIT="0cfbd0b3c2d827af629a107a77a9e47229c31663"
 TORCHSTORE_COMMIT="662299faf4fd50ee30bd9aa3f4ce8c0e2db1d310"
diff --git a/assets/wheels/monarch-0.0.1-cp310-cp310-linux_x86_64.whl b/assets/wheels/monarch-0.0.1-cp310-cp310-linux_x86_64.whl
deleted file mode 100644
index a704f8703..000000000
Binary files a/assets/wheels/monarch-0.0.1-cp310-cp310-linux_x86_64.whl and /dev/null differ
diff --git a/scripts/install.sh b/scripts/install.sh
index 30dcf1ff5..f1a09e1db 100755
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -274,21 +274,24 @@ main() {
     check_gh_install
     download_vllm_wheel
 
-    log_info "Installing PyTorch nightly..."
-    pip install torch==$PYTORCH_VERSION --index-url https://download.pytorch.org/whl/nightly/cu129
+    log_info "Installing PyTorch..."
+    if [[ "$PYTORCH_VERSION" == *"dev"* ]]; then
+        pip install --pre torch==$PYTORCH_VERSION --index-url https://download.pytorch.org/whl/nightly/cu128
+    else
+        pip install torch==$PYTORCH_VERSION
+    fi
+
+    log_info "Installing Monarch ..."
+    pip install torchmonarch==$MONARCH_VERSION
 
-    log_info "Installing all wheels (local + downloaded)..."
+    log_info "Installing remaining wheels (local + downloaded)..."
     pip install "$WHEEL_DIR"/*.whl
 
     log_info "Installing Forge from source..."
     pip install -e .
 
-    # Set up environment
     log_info "Setting up environment..."
-
-     # Get conda environment directory
     local conda_env_dir="${CONDA_PREFIX}"
-
     if [ -z "$conda_env_dir" ]; then
         log_error "Could not determine conda environment directory"
         exit 1