diff --git a/.github/workflows/build_wheels.yaml b/.github/workflows/build_wheels.yaml index a384991cd..cb4fb08c6 100644 --- a/.github/workflows/build_wheels.yaml +++ b/.github/workflows/build_wheels.yaml @@ -29,18 +29,18 @@ jobs: { "include": [ { - "python_version": "3.10", + "python_version": "3.12", "gpu_arch_type": "cuda", - "gpu_arch_version": "12.9", - "desired_cuda": "cu129", - "container_image": "pytorch/manylinux2_28-builder:cuda12.9", + "gpu_arch_version": "12.8", + "desired_cuda": "cu128", + "container_image": "pytorch/manylinux2_28-builder:cuda12.8", "package_type": "manywheel", - "build_name": "manywheel-py3_10-cuda12_9", + "build_name": "manywheel-py3_10-cuda12_8", "validation_runner": "linux.4xlarge.nvidia.gpu", - "installation": "pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu129", + "installation": "pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu128", "channel": "nightly", "upload_to_base_bucket": "no", - "stable_version": "2.8.0", + "stable_version": "2.9.0", "use_split_build": false } ] diff --git a/.github/workflows/gpu_test.yaml b/.github/workflows/gpu_test.yaml index c2a4705e5..54a4ae45c 100644 --- a/.github/workflows/gpu_test.yaml +++ b/.github/workflows/gpu_test.yaml @@ -40,21 +40,23 @@ jobs: python-version: ${{ matrix.python-version }} - name: Update pip run: python -m pip install --upgrade pip - - name: Install pinned torch nightly - run: python -m pip install --pre torch==2.9.0.dev20250905 --no-cache-dir --index-url https://download.pytorch.org/whl/nightly/cu129 - - name: Download and install vLLM and its dependencies - # TODO: this honestly could not be hackier if I tried - run: | - python -m pip install -r .github/packaging/vllm_reqs.txt - python -m pip install vllm==0.10.1.dev0+g6d8d0a24c.d20251009.cu129 --no-cache-dir --index-url https://download.pytorch.org/whl/preview/forge - - name: Install Monarch - run: pip install torchmonarch==0.1.0rc1 - - name: Install torchtitan and torchstore - run: | - python -m pip install git+https://github.com/pytorch/torchtitan.git - python -m pip install git+https://github.com/meta-pytorch/torchstore.git - - name: Install dependencies - run: python -m pip install --no-build-isolation -e ".[dev]" + - name: Install dependencies & forge package + run: ./scripts/install.sh --dev + # - name: Install pinned torch nightly + # run: python -m pip install --pre torch==2.9.0.dev20250905 --no-cache-dir --index-url https://download.pytorch.org/whl/nightly/cu129 + # - name: Download and install vLLM and its dependencies + # # TODO: this honestly could not be hackier if I tried + # run: | + # python -m pip install -r .github/packaging/vllm_reqs.txt + # python -m pip install vllm==0.10.1.dev0+g6d8d0a24c.d20251009.cu129 --no-cache-dir --index-url https://download.pytorch.org/whl/preview/forge + # - name: Install Monarch + # run: pip install torchmonarch==0.1.0rc1 + # - name: Install torchtitan and torchstore + # run: | + # python -m pip install git+https://github.com/pytorch/torchtitan.git + # python -m pip install git+https://github.com/meta-pytorch/torchstore.git + # - name: Install dependencies + # run: python -m pip install --no-build-isolation -e ".[dev]" - name: Run unit tests with coverage # TODO add all tests run: | diff --git a/assets/versions.sh b/assets/versions.sh index 7c188b0d5..5aa450ead 100644 --- a/assets/versions.sh +++ b/assets/versions.sh @@ -5,15 +5,10 @@ # LICENSE file in the root directory of this source tree. # Version Configuration for Forge Wheel Building -# This file contains all pinned versions and commits for dependencies +# This file contains all pinned versions/tags/commits for dependencies -# PyTorch version -PYTORCH_VERSION="2.9.0.dev20250905" - -# vLLM branch -VLLM_BRANCH="v0.10.0" - -# Commit hashes -MONARCH_COMMIT="195503223b5c2896846171f60ac99dc6868f8f2c" -TORCHTITAN_COMMIT="0cfbd0b3c2d827af629a107a77a9e47229c31663" -TORCHSTORE_COMMIT="662299faf4fd50ee30bd9aa3f4ce8c0e2db1d310" +PYTORCH_VERSION="2.9.0" +VLLM_VERSION="v0.10.0" +MONARCH_VERSION="0.1.0rc3" +TORCHTITAN_VERSION="0.1.0.dev20251015" +TORCHSTORE_VERSION="662299faf4fd50ee30bd9aa3f4ce8c0e2db1d310" diff --git a/scripts/build_wheels.sh b/scripts/build_wheels.sh index 407d17e6f..f2652954f 100755 --- a/scripts/build_wheels.sh +++ b/scripts/build_wheels.sh @@ -138,26 +138,26 @@ setup_build_dir() { setup_cuda_env() { log_info "Setting up CUDA environment..." - export CUDA_VERSION=12.9 + export CUDA_VERSION=12. export NVCC=/usr/local/cuda-${CUDA_VERSION}/bin/nvcc export CUDA_NVCC_EXECUTABLE=/usr/local/cuda-${CUDA_VERSION}/bin/nvcc export CUDA_HOME=/usr/local/cuda-${CUDA_VERSION} export PATH="${CUDA_HOME}/bin:$PATH" export CUDA_INCLUDE_DIRS=$CUDA_HOME/include export CUDA_CUDART_LIBRARY=$CUDA_HOME/lib64/libcudart.so - export LD_LIBRARY_PATH=/usr/local/cuda-12.9/compat:${LD_LIBRARY_PATH:-} + export LD_LIBRARY_PATH=/usr/local/cuda-12.8/compat:${LD_LIBRARY_PATH:-} export LIBRARY_PATH=$CUDA_HOME/lib64:${LIBRARY_PATH:-} # Save to file for persistence cat > ~/.forge_cuda_env << 'EOF' -export CUDA_VERSION=12.9 +export CUDA_VERSION=12.8 export NVCC=/usr/local/cuda-${CUDA_VERSION}/bin/nvcc export CUDA_NVCC_EXECUTABLE=/usr/local/cuda-${CUDA_VERSION}/bin/nvcc export CUDA_HOME=/usr/local/cuda-${CUDA_VERSION} export PATH="${CUDA_HOME}/bin:$PATH" export CUDA_INCLUDE_DIRS=$CUDA_HOME/include export CUDA_CUDART_LIBRARY=$CUDA_HOME/lib64/libcudart.so -export LD_LIBRARY_PATH=/usr/local/cuda-12.9/compat:${LD_LIBRARY_PATH:-} +export LD_LIBRARY_PATH=/usr/local/cuda-12.8/compat:${LD_LIBRARY_PATH:-} export LIBRARY_PATH=${CUDA_HOME}/lib64:${LIBRARY_PATH:-} EOF @@ -231,7 +231,7 @@ run_step() { # Step 1: Install PyTorch nightly step1_pytorch() { - pip3 install --pre torch==$PYTORCH_VERSION --index-url https://download.pytorch.org/whl/nightly/cu129 + pip3 install --pre torch==$PYTORCH_VERSION --index-url https://download.pytorch.org/whl/nightly/cu128 } # Step 2: Install CUDA system packages @@ -388,7 +388,7 @@ main() { log_info "Users can now install with:" log_info " conda create -n forge python=3.10 -y" log_info " conda activate forge" - log_info " pip install torch==$PYTORCH_VERSION --index-url https://download.pytorch.org/whl/nightly/cu129" + log_info " pip install torch==$PYTORCH_VERSION --index-url https://download.pytorch.org/whl/nightly/cu128" log_info " pip install $WHEEL_DIR/*.whl" if should_build "vllm" || should_build "monarch"; then log_info " source ~/.forge_cuda_env" diff --git a/scripts/install.sh b/scripts/install.sh index 30dcf1ff5..bf744f8c9 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -35,7 +35,7 @@ if [ -z "${PYTORCH_VERSION:-}" ]; then fi WHEEL_DIR="$SCRIPT_DIR/../assets/wheels" -RELEASE_TAG="v0.0.0-93025" +RELEASE_TAG="v0.0.0-93025" # Why is this hardcoded? GITHUB_REPO="meta-pytorch/forge" # Check conda environment @@ -161,57 +161,91 @@ check_wheels() { log_info "Found $wheel_count local wheels" } -# Download vLLM wheel from GitHub releases -download_vllm_wheel() { - log_info "Downloading vLLM wheel from GitHub releases..." +# Generic package installation function supporting multiple sources +# Args: package_name, version, github_repo +install_package() { + local package_name="$1" + local version="$2" + local github_repo="$3" + + log_info "Installing $package_name..." + + # Determine installation method based on version format + if [[ "$version" =~ ^v[0-9]+\.[0-9]+\.[0-9]+ ]]; then + # GitHub release tag format (e.g., v0.10.0) + log_info " Method: GitHub release tag ($version)" + install_from_github_release "$package_name" "$version" "$github_repo" + elif [[ "$version" =~ ^[0-9a-f]{40}$ ]]; then + # GitHub commit SHA format (40-character hex string) + log_info " Method: GitHub commit SHA ($version)" + install_from_github_commit "$package_name" "$version" "$github_repo" + else + # PyPI or PyTorch index format (e.g., 0.1.0, 2.9.0, 0.1.0.dev20251015) + log_info " Method: PyPI/PyTorch index ($version)" + install_from_index "$package_name" "$version" + fi +} + +# Install from PyPI or PyTorch index +install_from_index() { + local package_name="$1" + local version="$2" + + if [[ "$version" == *"dev"* ]]; then + pip install --pre "${package_name}==${version}" --extra-index-url https://download.pytorch.org/whl/nightly/cu128 + else + pip install "${package_name}==${version}" + fi + + if [ $? -ne 0 ]; then + log_error "Failed to install $package_name from index" + exit 1 + fi +} + +# Install from GitHub release tag +install_from_github_release() { + local package_name="$1" + local release_tag="$2" + local github_repo="$3" # Check if gh is installed if ! command -v gh &> /dev/null; then - log_error "GitHub CLI (gh) is required to download vLLM wheel" - log_info "Install it with: sudo dnf install gh" - log_info "Then run: gh auth login" + log_error "GitHub CLI (gh) is required to download from GitHub releases" + log_info "Run the installation script - it will install gh via conda" exit 1 fi - # Get the vLLM wheel filename from the release - local vllm_wheel_name - vllm_wheel_name=$(gh release view "$RELEASE_TAG" --repo "$GITHUB_REPO" --json assets --jq '.assets[] | select(.name | contains("vllm")) | .name' | head -1) + # Get the wheel URL from the release + local wheel_url + wheel_url=$(gh release view "$release_tag" --repo "$github_repo" --json assets --jq ".assets[] | select(.name | contains(\"$package_name\")) | .url" | head -1) - if [ -z "$vllm_wheel_name" ]; then - log_error "Could not find vLLM wheel in release $RELEASE_TAG" - log_info "Make sure the vLLM wheel has been uploaded to the GitHub release" + if [ -z "$wheel_url" ]; then + log_error "Could not find $package_name wheel in release $release_tag" + log_info "Make sure the $package_name wheel has been uploaded to the GitHub release" exit 1 fi - for f in assets/wheels/vllm-*; do - [ -e "$f" ] || continue # skip if glob didn't match - if [ "$(basename "$f")" != "$vllm_wheel_name" ]; then - log_info "Removing stale vLLM wheel: $(basename "$f")" - rm -f "$f" - fi - done - local local_path="$WHEEL_DIR/$vllm_wheel_name" + log_info " Installing from: $wheel_url" + pip install "$wheel_url" - if [ -f "$local_path" ]; then - log_info "vLLM wheel already downloaded: $vllm_wheel_name" - return 0 + if [ $? -ne 0 ]; then + log_error "Failed to install $package_name from GitHub release" + exit 1 fi +} - log_info "Downloading: $vllm_wheel_name" - - # Save current directory and change to wheel directory - local original_dir=$(pwd) - cd "$WHEEL_DIR" - gh release download "$RELEASE_TAG" --repo "$GITHUB_REPO" --pattern "*vllm*" - local download_result=$? +# Install from GitHub commit SHA +install_from_github_commit() { + local package_name="$1" + local commit_sha="$2" + local github_repo="$3" - # Always return to original directory - cd "$original_dir" + log_info " Installing from: git+https://github.com/$github_repo.git@$commit_sha" + pip install "git+https://github.com/$github_repo.git@$commit_sha" - if [ $download_result -eq 0 ]; then - log_info "Successfully downloaded vLLM wheel" - else - log_error "Failed to download vLLM wheel" + if [ $? -ne 0 ]; then + log_error "Failed to install $package_name from GitHub commit" exit 1 fi } @@ -265,28 +299,30 @@ main() { echo "" check_conda_env - check_wheels + # check_wheels # Install openssl as we overwrite the default version when we update LD_LIBRARY_PATH conda install -y openssl install_system_packages "$USE_SUDO" check_gh_install - download_vllm_wheel - - log_info "Installing PyTorch nightly..." - pip install torch==$PYTORCH_VERSION --index-url https://download.pytorch.org/whl/nightly/cu129 - log_info "Installing all wheels (local + downloaded)..." - pip install "$WHEEL_DIR"/*.whl + # Install all packages using the generic install_package function + # Syntax: install_package "package_name" "version" "github_repo" + install_package "torch" "$PYTORCH_VERSION" "pytorch/pytorch" + install_package "vllm" "$VLLM_VERSION" "vllm-project/vllm" + install_package "torchmonarch" "$MONARCH_VERSION" "meta-pytorch/monarch" + install_package "torchstore" "$TORCHSTORE_VERSION" "meta-pytorch/torchstore" + install_package "torchtitan" "$TORCHTITAN_VERSION" "pytorch/torchtitan" + install_package "torch" "$PYTORCH_VERSION" "pytorch/pytorch" - log_info "Installing Forge from source..." + log_info "Installing TorchForge from source..." pip install -e . # Set up environment log_info "Setting up environment..." - # Get conda environment directory + # Get conda environment directory local conda_env_dir="${CONDA_PREFIX}" if [ -z "$conda_env_dir" ]; then @@ -301,7 +337,7 @@ main() { local cuda_activation_script="${conda_env_dir}/etc/conda/activate.d/cuda_env.sh" cat > "$cuda_activation_script" << 'EOF' # CUDA environment for Forge -export CUDA_VERSION=12.9 +export CUDA_VERSION=12.8 export NVCC=/usr/local/cuda-${CUDA_VERSION}/bin/nvcc export CUDA_NVCC_EXECUTABLE=/usr/local/cuda-${CUDA_VERSION}/bin/nvcc export CUDA_HOME=/usr/local/cuda-${CUDA_VERSION}