Skip to content

Add flash attn backend, duplicates #314 #650

Add flash attn backend, duplicates #314

Add flash attn backend, duplicates #314 #650

Workflow file for this run

name: PR Test
on:
pull_request:
branches: [ main ]
workflow_dispatch:
concurrency:
group: pr-test-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: read
jobs:
unit-test:
if: (github.repository == 'sgl-project/SpecForge' || github.event_name == 'pull_request') &&
github.event.pull_request.draft == false
runs-on: [self-hosted]
container:
image: lmsysorg/sglang:v0.5.5 # we lock to this version to avoid repeated docker pull
options: --gpus all --shm-size=2g --rm -v /dev/shm
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Restore cache
run: |
if [ -d /github/home/cache ] && [ ! -z "$(ls -A /github/home/cache/)" ]; then
cp -p -r /github/home/cache ./
fi
if [ -d /github/home/sf ] && [ ! -z "$(ls -A /github/home/sf/)" ]; then
cp -p -r /github/home/sf ./
fi
- name: Remove flashinfer # this is needed to avoid flashinfer jit compilation makes the program hang
run: |
rm -rf /github/home/.cache/flashinfer
- name: Install dependencies
shell: bash
run: |
# if sf venv does not exist, create it
if [ ! -d sf ]; then
uv venv sf -p 3.11
fi
source sf/bin/activate
uv pip install setuptools
MAX_JOBS=8 uv pip install -v ".[fa]" --prerelease=allow --no-build-isolation
- name: Run test
timeout-minutes: 30
shell: bash
run: |
source sf/bin/activate
uv pip list
export PYTHONPATH=$PWD
python tests/test_utils/test_flash_attention.py
- name: Save cache
run: |
cp -p -r sf /github/home/
cp -p -r cache /github/home/