Install enroot for gpu unit tests #305
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: GPU Tests | |
on: | |
schedule: | |
# Runs at midnight every day | |
- cron: '0 0 * * *' | |
push: | |
branches: [ main ] | |
pull_request: | |
workflow_dispatch: | |
concurrency: | |
group: gpu-test-${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }} | |
cancel-in-progress: true | |
permissions: | |
id-token: write | |
contents: read | |
defaults: | |
run: | |
shell: bash -l -eo pipefail {0} | |
jobs: | |
gpu_test: | |
if: github.repository_owner == 'meta-pytorch' | |
runs-on: linux.g5.12xlarge.nvidia.gpu | |
strategy: | |
matrix: | |
python-version: ['3.10'] | |
steps: | |
- name: Check out repo | |
uses: actions/checkout@v4 | |
- name: Install enroot | |
run: | | |
version="3.5.0" | |
# Install dependencies | |
sudo dnf install -y jq squashfs-tools parallel | |
# Download pre-built packages | |
curl -fSsL -O https://github.com/NVIDIA/enroot/releases/download/v${version}/enroot-${version}-1.el8.x86_64.rpm | |
curl -fSsL -O https://github.com/NVIDIA/enroot/releases/download/v${version}/enroot+caps-${version}-1.el8.x86_64.rpm | |
# Install packages | |
sudo rpm -ivh enroot-${version}-1.el8.x86_64.rpm enroot+caps-${version}-1.el8.x86_64.rpm | |
rm -f enroot*.rpm | |
# Configure enroot to use user-writable directories | |
mkdir -p ${HOME}/.config/enroot | |
cat > ${HOME}/.config/enroot/enroot.conf <<EOF | |
ENROOT_RUNTIME_PATH ${HOME}/.local/share/enroot/runtime | |
ENROOT_CACHE_PATH ${HOME}/.local/share/enroot/cache | |
ENROOT_DATA_PATH ${HOME}/.local/share/enroot/data | |
EOF | |
# Create the directories | |
mkdir -p ${HOME}/.local/share/enroot/{runtime,cache,data} | |
- name: Setup conda env | |
uses: conda-incubator/setup-miniconda@v2 | |
with: | |
auto-update-conda: true | |
miniconda-version: "latest" | |
activate-environment: test | |
python-version: ${{ matrix.python-version }} | |
- name: Update pip | |
run: python -m pip install --upgrade pip | |
- name: Install pinned torch nightly | |
run: python -m pip install --pre torch==2.9.0.dev20250905 --no-cache-dir --index-url https://download.pytorch.org/whl/nightly/cu129 | |
- name: Download and install vLLM and its dependencies | |
# TODO: this honestly could not be hackier if I tried | |
run: | | |
python -m pip install -r .github/packaging/vllm_reqs.txt | |
python -m pip install vllm==0.10.1.dev0+g6d8d0a24c.d20251009.cu129 --no-cache-dir --index-url https://download.pytorch.org/whl/preview/forge | |
- name: Install Monarch | |
run: pip install torchmonarch==0.1.0rc1 | |
- name: Install torchtitan and torchstore | |
run: | | |
python -m pip install git+https://github.com/pytorch/torchtitan.git | |
python -m pip install git+https://github.com/meta-pytorch/torchstore.git | |
- name: Install dependencies | |
run: python -m pip install --no-build-isolation -e ".[dev]" | |
- name: Run unit tests with coverage | |
# TODO add all tests | |
run: | | |
export LD_PRELOAD=$CONDA/envs/test/lib/libpython3.10.so.1.0 | |
export LD_LIBRARY_PATH=$CONDA/envs/test/lib/libpython3.10.so.1.0 | |
pytest tests/unit_tests --cov=. --cov-report=xml --durations=20 -vv | |
- name: Upload Coverage to Codecov | |
uses: codecov/codecov-action@v3 |