Add flash attn backend, duplicates #314 #650

	name: PR Test

	on:
	pull_request:
	branches: [ main ]
	workflow_dispatch:

	concurrency:
	group: pr-test-${{ github.ref }}
	cancel-in-progress: true

	permissions:
	contents: read

	jobs:
	unit-test:
	if: (github.repository == 'sgl-project/SpecForge' \|\| github.event_name == 'pull_request') &&
	github.event.pull_request.draft == false
	runs-on: [self-hosted]
	container:
	image: lmsysorg/sglang:v0.5.5 # we lock to this version to avoid repeated docker pull
	options: --gpus all --shm-size=2g --rm -v /dev/shm
	steps:
	- name: Checkout code
	uses: actions/checkout@v4

	- name: Restore cache
	run: \|
	if [ -d /github/home/cache ] && [ ! -z "$(ls -A /github/home/cache/)" ]; then
	cp -p -r /github/home/cache ./
	fi

	if [ -d /github/home/sf ] && [ ! -z "$(ls -A /github/home/sf/)" ]; then
	cp -p -r /github/home/sf ./
	fi

	- name: Remove flashinfer # this is needed to avoid flashinfer jit compilation makes the program hang
	run: \|
	rm -rf /github/home/.cache/flashinfer

	- name: Install dependencies
	shell: bash
	run: \|
	# if sf venv does not exist, create it
	if [ ! -d sf ]; then
	uv venv sf -p 3.11
	fi
	source sf/bin/activate
	uv pip install setuptools
	MAX_JOBS=8 uv pip install -v ".[fa]" --prerelease=allow --no-build-isolation

	- name: Run test
	timeout-minutes: 30
	shell: bash
	run: \|
	source sf/bin/activate
	uv pip list
	export PYTHONPATH=$PWD
	python tests/test_utils/test_flash_attention.py

	- name: Save cache
	run: \|
	cp -p -r sf /github/home/
	cp -p -r cache /github/home/

Provide feedback