feat: add GoldenSwag task (#175) #1390

Workflow file for this run

	name: CI

	on:
	push:
	branches: [main]
	pull_request:
	types: [opened, reopened, synchronize, labeled, unlabeled, edited]
	# Manually trigger a workflow for a branch
	workflow_dispatch:
	# Merge queue trigger
	merge_group:

	permissions:
	contents: read

	concurrency:
	group: ${{ github.workflow }}-${{ github.event.pull_request.number \|\| github.ref }}
	cancel-in-progress: true

	env:
	REGISTRY: registry.gitlab.aleph-alpha.de
	REPO_OWNER: research/public-registry
	IMAGE_NAME: eval_framework
	HF_DATASET_CACHE_DIR: /tmp/huggingface_datasets # <- single source of truth
	UV_LINK_MODE: symlink
	UV_LOCKED: 1

	jobs:

	lint-pr-title:
	runs-on: ubuntu-latest
	steps:
	- name: Checkout code
	uses: actions/checkout@v4

	- name: Skip PR title check (non-PR)
	if: github.event_name != 'pull_request'
	run: \|
	echo "Not a PR, skipping PR title check"

	- name: Set up Python
	if: github.event_name == 'pull_request'
	uses: actions/setup-python@v4
	with:
	python-version: "3.11"

	- name: Install commitizen
	if: github.event_name == 'pull_request'
	run: \|
	python -m pip install --upgrade pip
	pip install commitizen

	- name: Validate PR Title (only on PRs)
	if: github.event_name == 'pull_request'
	id: pr-check
	run: \|
	cz check --message "${{ github.event.pull_request.title }}"

	lint:
	runs-on: ubuntu-latest # default runner runs out of disk space due to hf cache
	needs: [lint-pr-title]
	steps:
	- name: Checkout Repository
	uses: actions/checkout@v4

	- name: Setup uv
	uses: astral-sh/setup-uv@v6
	with:
	version: "~=0.8.16"

	- name: Run Pre-Commit
	run: uvx pre-commit run --all-files

	- name: Dependency check
	run: ./utils/dependency_check.sh

	- name: Run MyPy
	run: uv run --all-extras mypy

	hf-datasets-cache:
	runs-on: cpu-runner-8c-32gb-01 # default runner runs out of disk space, unfortunately
	steps:
	- uses: actions/checkout@v4

	- name: Setup uv
	uses: astral-sh/setup-uv@v6
	with:
	version: "~=0.8.16"

	- name: Restore and save cache
	uses: actions/cache@v4
	with:
	path: ${{ env.HF_DATASET_CACHE_DIR }}
	key: hf-datasets-${{ github.run_id }}
	restore-keys: \|
	hf-datasets-

	- name: Download only updated datasets
	env:
	HF_TOKEN: ${{ secrets.HUGGINGFACE_API_KEY }}
	run: \|
	echo "Updating changed datasets"
	uv run --extra=comet --extra=openai python tests/tests_eval_framework/utils/update_datasets.py update

	tag:
	# Set Docker Tag and Image Name for Docker Build and Push (GPU Runs)
	runs-on: ubuntu-latest
	outputs:
	tag: ${{ steps.set-tag.outputs.tag }}
	image: ${{ steps.set-tag.outputs.image }}
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	- name: Set Tag
	id: set-tag
	run: \|
	if [ "${{ github.ref }}" == "refs/heads/main" ]; then
	TAG='latest'
	else
	# head_ref is the correct branch name for PRs
	BRANCH_NAME=${{ github.head_ref \|\| github.ref_name }}
	# Convert slashes with hyphens and ensure valid Docker tag format
	TAG=$(echo "${BRANCH_NAME}" \| sed 's/[^a-zA-Z0-9._-]/-/g' \| cut -c1-20)
	fi
	echo "tag=$TAG" >> $GITHUB_OUTPUT
	echo "image=${{ env.REGISTRY }}/${{ env.REPO_OWNER }}/${{ env.IMAGE_NAME }}:$TAG" >> $GITHUB_OUTPUT

	- name: Output Docker Tag
	run: \|
	echo "Docker Tag: ${{ steps.set-tag.outputs.tag }}"
	echo "Docker image: ${{ steps.set-tag.outputs.image }}"

	build:
	# Build and Push Docker Image (GPU Runs)
	needs: [lint, tag]
	runs-on: cpu-runner-8c-32gb-01
	container: docker:dind
	steps:
	- name: Checkout Repository
	uses: actions/checkout@v4

	- name: Registry Authentication
	uses: docker/login-action@v3
	with:
	registry: ${{ env.REGISTRY }}
	username: token
	password: ${{ secrets.GL_PUBLIC_REGISTRY_READ_WRITE_TOKEN }}

	- name: Setup Docker BuildX
	uses: docker/setup-buildx-action@v3

	- name: Build and Push Image
	uses: docker/build-push-action@v6
	with:
	context: .
	file: Dockerfile

	push: true
	tags: ${{ needs.tag.outputs.image }}

	build-args: BUILDKIT_INLINE_CACHE=1

	cache-from: \|
	type=gha,scope=shared
	type=registry,ref=${{ env.REGISTRY }}/${{ env.REPO_OWNER }}/${{ env.IMAGE_NAME }}:buildcache
	type=registry,ref=${{ env.REGISTRY }}/${{ env.REPO_OWNER }}/${{ env.IMAGE_NAME }}:latest
	type=registry,ref=${{ env.REGISTRY }}/${{ env.REPO_OWNER }}/${{ env.IMAGE_NAME }}:${{ needs.tag.outputs.tag }}

	test-extras:
	# Test uv installs (CPU)
	runs-on: ubuntu-latest
	needs: [lint]
	strategy:
	fail-fast: false
	matrix:
	extras: ['', 'determined', 'api', 'openai', 'transformers', 'accelerate', 'comet', 'optional']
	steps:
	- name: Checkout Repository
	uses: actions/checkout@v4

	- name: Setup uv
	uses: astral-sh/setup-uv@v6
	with:
	version: "~=0.8.16"

	- name: Verify install and functionality via uv --exact
	run: \|
	if [ "${{ matrix.extras }}" != "" ]; then
	echo "Testing extra: ${{ matrix.extras }}"
	uv run --exact --extra ${{ matrix.extras }} pytest -v --noconftest tests/tests_eval_framework/installs/test_${{ matrix.extras }}.py
	else
	echo "Testing core install"
	uv run --exact pytest --noconftest -v tests/tests_eval_framework/installs/test_core.py
	fi

	test-cpu:
	runs-on: cpu-runner-8c-32gb-01
	container: derskythe/github-runner-base:ubuntu-noble
	needs: [hf-datasets-cache, test-extras]
	steps:
	- uses: actions/checkout@v4

	- name: Setup uv
	uses: astral-sh/setup-uv@v6
	with:
	version: "~=0.8.16"

	- name: Huggingface datasets cache
	uses: actions/cache/restore@v4
	with:
	path: ${{ env.HF_DATASET_CACHE_DIR }} # <- shared path
	key: hf-datasets-${{ github.run_id }}
	restore-keys: \|
	hf-datasets-

	- name: Run tests
	env:
	HF_TOKEN: ${{ secrets.HUGGINGFACE_API_KEY }}
	run: \|
	uv run --all-extras python -c "import nltk; nltk.download('punkt_tab')"
	uv run --all-extras pytest -n auto --max-worker-restart=0 --durations=30 -v -m "not gpu and not cpu_slow and not external_api and not formatter_hash"

	test-cpu-slow:
	runs-on: cpu-runner-8c-32gb-01
	container: derskythe/github-runner-base:ubuntu-noble
	needs: [hf-datasets-cache, test-extras]
	steps:
	- uses: actions/checkout@v4

	- name: Setup uv
	uses: astral-sh/setup-uv@v6
	with:
	version: "~=0.8.16"

	- name: Huggingface datasets cache
	uses: actions/cache/restore@v4
	with:
	path: ${{ env.HF_DATASET_CACHE_DIR }}
	key: hf-datasets-${{ github.run_id }}
	restore-keys: \|
	hf-datasets-

	- name: Run tests
	env:
	HF_TOKEN: ${{ secrets.HUGGINGFACE_API_KEY }}
	run: \|
	uv run --all-extras python -c "import nltk; nltk.download('punkt_tab')"
	uv run --all-extras pytest -n auto --max-worker-restart=0 --durations=30 -v -m "not gpu and cpu_slow and not external_api and not formatter_hash"

	test-formatter-hash:
	runs-on: cpu-runner-8c-32gb-01
	container: derskythe/github-runner-base:ubuntu-noble
	needs: [hf-datasets-cache, test-extras]
	steps:
	- uses: actions/checkout@v4

	- name: Setup uv
	uses: astral-sh/setup-uv@v6
	with:
	version: "~=0.8.16"

	- name: Huggingface datasets cache
	uses: actions/cache/restore@v4
	with:
	path: ${{ env.HF_DATASET_CACHE_DIR }}
	key: hf-datasets-${{ github.run_id }}
	restore-keys: \|
	hf-datasets-

	- name: Run formatter hashing tests
	env:
	HF_TOKEN: ${{ secrets.HUGGINGFACE_API_KEY }}
	run: \|
	uv run --all-extras python -c "import nltk; nltk.download('punkt_tab')"
	uv run --all-extras pytest -n auto --max-worker-restart=0 --durations=30 -v -m "formatter_hash"

	test-docker-gpu:
	# Run full test suite in Docker Container with GPU
	runs-on: EvalFrameworkGPURunner
	needs: [tag, build, test-cpu, test-cpu-slow, test-formatter-hash]
	container:
	image: "${{ needs.tag.outputs.image }}"
	credentials:
	username: token
	password: ${{ secrets.GL_PUBLIC_REGISTRY_READ_WRITE_TOKEN }}
	options: --gpus all
	defaults:
	run:
	working-directory: /eval_framework
	steps:
	- name: Verify GPU installs via uv --exact
	run: \|
	set -e # fail fast if any test fails

	echo "Testing vllm extra"
	uv run --exact --extra vllm pytest -v --noconftest tests/tests_eval_framework/installs/test_vllm.py

	echo "Testing mistral extra"
	uv run --exact --extra mistral pytest -v --noconftest tests/tests_eval_framework/installs/test_mistral.py

	echo "Testing all extras together"
	uv run --exact --all-extras pytest -v --noconftest tests/tests_eval_framework/installs/

	- name: Huggingface datasets cache
	uses: actions/cache/restore@v4
	with:
	path: ${{ env.HF_DATASET_CACHE_DIR }}
	key: hf-datasets-${{ github.run_id }}
	restore-keys: \|
	hf-datasets-

	- name: Test GPU
	timeout-minutes: 20
	env:
	HF_TOKEN: ${{ secrets.HUGGINGFACE_API_KEY }}
	run: uv run --all-extras pytest -n auto --max-worker-restart=0 --durations=30 -v -m "gpu and not cpu_slow and not external_api and not vllm"

	- name: Test VLLM
	timeout-minutes: 20
	env:
	HF_TOKEN: ${{ secrets.HUGGINGFACE_API_KEY }}
	VLLM_LOGGING_LEVEL: DEBUG
	VLLM_WORKER_MULTIPROC_METHOD: spawn
	VLLM_USE_MODELSCOPE: False
	VLLM_NCCL_SO_PATH: ""
	VLLM_USE_TRITON_FLASH_ATTN: 0
	VLLM_DISABLE_CUSTOM_ALL_REDUCE: 1
	run: pytest --log-cli-level=INFO -v -m "vllm"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

feat: add GoldenSwag task (#175) #1390

Workflow file

feat: add GoldenSwag task (#175) #1390

Uh oh!

Workflow file for this run