Skip to content

GPU CI

GPU CI #2

Workflow file for this run

name: GPU tests
on:
schedule:
# Runs at midnight every day
- cron: '0 0 * * *'
push:
branches: [ main ]
pull_request:
workflow_dispatch:
concurrency:
group: gpu-test-${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }}
cancel-in-progress: true
permissions:
id-token: write
contents: read
defaults:
run:
shell: bash -l -eo pipefail {0}
jobs:
gpu_test:
if: github.repository_owner == 'meta-pytorch'
runs-on: linux.g5.12xlarge.nvidia.gpu
strategy:
matrix:
python-version: ['3.10']
steps:
- name: Check out repo
uses: actions/checkout@v4
- name: Setup conda env
uses: conda-incubator/setup-miniconda@v2
with:
auto-update-conda: true
miniconda-version: "latest"
activate-environment: test
python-version: ${{ matrix.python-version }}
- name: Update pip
run: python -m pip install --upgrade pip
- name: Install torch nightly
run: python -m pip install torch --extra-index-url https://download.pytorch.org/cu128
- name: Install torchtitan and torchstore
run: python -m pip install torchtitan torchstore
- name: Download and install monarch
run: pip install monarch --index-url https://download.pytorch.org/whl/preview/forge
- name: Download and install vLLM
run: pip install vllm --index-url https://download.pytorch.org/whl/preview/forge
- name: Run unit tests with coverage
# TODO add all tests
run: pytest tests/unit_tests --cov=. --cov-report=xml --durations=20 -vv
- name: Upload Coverage to Codecov
uses: codecov/codecov-action@v3