Skip to content

[parked] Use gpu runner in CI #450

[parked] Use gpu runner in CI

[parked] Use gpu runner in CI #450

Workflow file for this run

name: Unit Test
on:
pull_request:
jobs:
build-and-test:
strategy:
matrix:
python-version: ['3.10', '3.11', '3.12']
# adapted from torchtitan/.github/workflows/integration_test_8gpu_h100.yaml
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
secrets: inherit # Pass all secrets
with:
runner: linux.aws.h100.8
gpu-arch-type: cuda
gpu-arch-version: "12.9"
docker-image: pytorch/almalinux-builder:cuda12.9-main
repository: meta-pytorch/forge
upload-artifact: "coverage-report"
script: |
set -eux
# Log CUDA driver version for debugging.
DRIVER_VERSION=$(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -n 1 || true)
echo "CUDA driver version: ${DRIVER_VERSION}"
conda create -n test python="${{ matrix.python-version }}" -y
conda activate test
pip config --user set global.progress_bar off
# Upgrade pip
python -m pip install --upgrade pip
# Install pytorch
python -m pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu129
# Install monarch
python -m pip install monarch-no-torch==0.1.0.dev20250826 --find-links assets/ci
# Install torchstore
eval "$(ssh-agent -s)"
ssh-add - <<< '$FORGE_GITHUB_CI_FOR_TORCHSTORE'
python -m pip install git+ssh://[email protected]/meta-pytorch/torchstore.git
# Install torchtitan
pip install --pre torchtitan --index-url https://download.pytorch.org/whl/nightly/cu129
# Install dependencies
python -m pip install --no-build-isolation -e ".[dev]"
# Run unit tests with coverage
pytest tests/unit_tests --cov=. --cov-report=xml --durations=20 -vv
upload-coverage:
strategy:
matrix:
python-version: ['3.10', '3.11', '3.12']
needs: build-and-test
runs-on: ubuntu-latest
steps:
- name: Download coverage artifact
uses: actions/download-artifact@v4
with:
name: coverage-report
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
with:
files: coverage.xml
token: ${{ secrets.CODECOV_TOKEN }}