[parked] Use gpu runner in CI #451
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Unit Test | |
on: | |
pull_request: | |
jobs: | |
build-and-test: | |
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
secrets: inherit # Pass all secrets | |
strategy: | |
fail-fast: false | |
matrix: | |
python-version: ['3.10', '3.11', '3.12'] | |
include: | |
- name: 4xlargegpu | |
runs-on: linux.g5.4xlarge.nvidia.gpu | |
torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cu126' | |
gpu-arch-type: "cuda" | |
gpu-arch-version: "12.6" | |
# adapted from torchtitan/.github/workflows/integration_test_8gpu_h100.yaml | |
with: | |
timeout: 120 | |
runner: ${{ matrix.runs-on }} | |
gpu-arch-type: ${{ matrix.gpu-arch-type }} | |
gpu-arch-version: ${{ matrix.gpu-arch-version }} | |
submodules: recursive | |
upload-artifact: "coverage-report" | |
script: | | |
set -eux | |
# Log CUDA driver version for debugging. | |
DRIVER_VERSION=$(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -n 1 || true) | |
echo "CUDA driver version: ${DRIVER_VERSION}" | |
conda create -n test python="${{ matrix.python-version }}" -y | |
conda activate test | |
pip config --user set global.progress_bar off | |
# Upgrade pip | |
python -m pip install --upgrade pip | |
# Install pytorch | |
python -m pip install ${{ matrix.torch-spec }} | |
# Install monarch | |
python -m pip install monarch-no-torch==0.1.0.dev20250826 --find-links assets/ci | |
# Install torchstore | |
eval "$(ssh-agent -s)" | |
ssh-add - <<< '${FORGE_GITHUB_CI_FOR_TORCHSTORE}' | |
python -m pip install git+ssh://[email protected]/meta-pytorch/torchstore.git | |
# Install torchtitan | |
pip install --pre torchtitan --index-url https://download.pytorch.org/whl/nightly/cu129 | |
# Install dependencies | |
python -m pip install --no-build-isolation -e ".[dev]" | |
# Run unit tests with coverage | |
pytest tests/unit_tests --cov=. --cov-report=xml --durations=20 -vv | |
upload-coverage: | |
strategy: | |
matrix: | |
python-version: ['3.10', '3.11', '3.12'] | |
needs: build-and-test | |
runs-on: ubuntu-latest | |
steps: | |
- name: Download coverage artifact | |
uses: actions/download-artifact@v4 | |
with: | |
name: coverage-report | |
- name: Upload coverage to Codecov | |
uses: codecov/codecov-action@v4 | |
with: | |
files: coverage.xml | |
token: ${{ secrets.CODECOV_TOKEN }} |