[wip] shared memory optimization for policy weight update #108
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: GPU tests | |
on: | |
schedule: | |
# Runs at midnight every day | |
- cron: '0 0 * * *' | |
push: | |
branches: [ main ] | |
pull_request: | |
workflow_dispatch: | |
concurrency: | |
group: gpu-test-${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }} | |
cancel-in-progress: true | |
permissions: | |
id-token: write | |
contents: read | |
defaults: | |
run: | |
shell: bash -l -eo pipefail {0} | |
jobs: | |
gpu_test: | |
if: github.repository_owner == 'meta-pytorch' | |
runs-on: linux.g5.12xlarge.nvidia.gpu | |
strategy: | |
matrix: | |
python-version: ['3.10'] | |
steps: | |
- name: Check out repo | |
uses: actions/checkout@v4 | |
- name: Setup conda env | |
uses: conda-incubator/setup-miniconda@v2 | |
with: | |
auto-update-conda: true | |
miniconda-version: "latest" | |
activate-environment: test | |
python-version: ${{ matrix.python-version }} | |
- name: Update pip | |
run: python -m pip install --upgrade pip | |
- name: Install pinned torch nightly | |
run: python -m pip install --pre torch==2.9.0.dev20250905 --no-cache-dir --index-url https://download.pytorch.org/whl/nightly/cu129 | |
- name: Download and install vLLM and its dependencies | |
# TODO: this honestly could not be hackier if I tried | |
run: | | |
python -m pip install -r .github/packaging/vllm_reqs.txt | |
python -m pip install vllm==0.10.1.dev0+g6d8d0a24c.d20251009.cu129 --no-cache-dir --index-url https://download.pytorch.org/whl/preview/forge | |
- name: Install Monarch | |
run: pip install torchmonarch==0.1.0rc1 | |
- name: Install torchtitan and torchstore | |
run: | | |
python -m pip install git+https://github.com/pytorch/torchtitan.git | |
python -m pip install git+https://github.com/meta-pytorch/torchstore.git | |
- name: Install dependencies | |
run: python -m pip install --no-build-isolation -e ".[dev]" | |
- name: Run unit tests with coverage | |
# TODO add all tests | |
run: | | |
export LD_PRELOAD=$CONDA/envs/test/lib/libpython3.10.so.1.0 | |
export LD_LIBRARY_PATH=$CONDA/envs/test/lib/libpython3.10.so.1.0 | |
pytest tests/unit_tests --cov=. --cov-report=xml --durations=20 -vv | |
- name: Upload Coverage to Codecov | |
uses: codecov/codecov-action@v3 |