feat(evaluation): Add lm-eval to Pruna Metrics #1298

Workflow file for this run

	name: Code Quality & Tests

	permissions:
	contents: read
	pull-requests: read
	actions: write

	on:
	push:
	branches:
	- main
	pull_request:
	branches:
	- main

	concurrency:
	group: ci-${{ github.repository }}-tests-${{ github.ref }}
	cancel-in-progress: true

	jobs:
	linting:
	runs-on: ubuntu-22.04
	outputs:
	success: ${{ steps.lint-check.outputs.success }}

	strategy:
	matrix:
	python-version: ["3.11"]

	steps:
	- name: Checkout code
	uses: actions/checkout@v6

	- name: Set up Python
	uses: actions/setup-python@v5
	with:
	python-version: ${{ matrix.python-version }}

	- uses: ./.github/actions/setup-uv-project

	- name: Get changed Python files
	id: changed-files
	uses: tj-actions/changed-files@v47
	with:
	files: \|
	*/.py
	files_ignore: \|
	tests/**
	- name: Run ruff on changed Pruna code
	if: steps.changed-files.outputs.any_changed == 'true'
	uses: astral-sh/ruff-action@v3
	with:
	version: "latest"
	src: ${{ steps.changed-files.outputs.all_changed_files }}

	- name: Run Ty type checker on Pruna code
	run: uv run ty check src/pruna

	- name: Run docstring checks with pytest
	run: uv run pytest -m "style"

	- name: Set lint check output
	id: lint-check
	run: echo "success=true" >> $GITHUB_OUTPUT

	test:
	needs: linting
	runs-on: pruna-cpu

	strategy:
	matrix:
	python-version: ["3.11"]

	env:
	HF_TOKEN: ${{ secrets.HF_TOKEN }}
	HF_HOME: ${{ github.workspace }}/.cache/huggingface
	HF_DATASETS_CACHE: ${{ github.workspace }}/.cache/huggingface/datasets
	HUGGINGFACE_HUB_CACHE: ${{ github.workspace }}/.cache/huggingface/hub

	steps:
	- name: Checkout code
	uses: actions/checkout@v6

	- name: Set up Python
	uses: actions/setup-python@v5
	with:
	python-version: ${{ matrix.python-version }}

	- uses: ./.github/actions/setup-uv-project

	- name: Cache Hugging Face datasets and models
	uses: actions/cache@v5
	with:
	path: \|
	.cache/huggingface/datasets
	.cache/huggingface/hub
	.cache/huggingface/models
	key: hf-cache-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('pyproject.toml') }}
	restore-keys: \|
	hf-cache-${{ runner.os }}-${{ matrix.python-version }}-
	hf-cache-${{ runner.os }}-

	- name: Export HF token for subprocesses
	run: \|
	echo "Exporting HUGGINGFACE_HUB_TOKEN so subprocesses inherit auth"
	echo "HUGGINGFACE_HUB_TOKEN=$HF_TOKEN" >> "$GITHUB_ENV"
	env:
	HF_TOKEN: ${{ secrets.HF_TOKEN }}

	- name: Clean incomplete Hugging Face cache files (if any)
	run: \|
	echo "Removing any *.incomplete directories from HF caches..."
	set -eux
	# datasets incomplete folders
	if [ -d "${{ github.workspace }}/.cache/huggingface/datasets" ]; then
	find "${{ github.workspace }}/.cache/huggingface/datasets" -type d -name "*.incomplete" -prune -exec rm -rf {} + \|\| true
	fi
	# hub incomplete folders
	if [ -d "${{ github.workspace }}/.cache/huggingface/hub" ]; then
	find "${{ github.workspace }}/.cache/huggingface/hub" -type d -name "*.incomplete" -prune -exec rm -rf {} + \|\| true
	fi
	# models incomplete folders
	if [ -d "${{ github.workspace }}/.cache/huggingface/models" ]; then
	find "${{ github.workspace }}/.cache/huggingface/models" -type d -name "*.incomplete" -prune -exec rm -rf {} + \|\| true
	fi

	- name: Set test worker count
	run: echo "PYTEST_WORKERS=$(( $(nproc) - 1 ))" >> "$GITHUB_ENV" # leave 1 for the system process

	- name: Run tests with pytest-rerunfailures
	run: \|
	echo "Running tests with up to 3 reruns on failure using $PYTEST_WORKERS workers..."
	uv run pytest -n $PYTEST_WORKERS -m "not (slow or style or high_cpu or cuda or distributed)" --reruns 3 --reruns-delay 10 --maxfail=1

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

feat(evaluation): Add lm-eval to Pruna Metrics #1298

Workflow file

feat(evaluation): Add lm-eval to Pruna Metrics #1298

Uh oh!

Workflow file for this run