Add new model to benchmark #296

Workflow file for this run

.github/workflows/benchmark-append.yml at 7f594ae

	name: Add new model to benchmark

	on:
	workflow_dispatch:
	inputs:
	model:
	description: "Model to benchmark"
	required: true
	default: "openai/gpt-4o"

	jobs:
	benchmark:
	runs-on: ubuntu-latest
	steps:
	- name: Checkout repository
	uses: actions/checkout@v4
	with:
	token: ${{ secrets.GITHUB_TOKEN }}

	- name: Setup Node.js
	uses: actions/setup-node@v4
	with:
	node-version: '18'
	cache: 'npm'
	cache-dependency-path: src/package-lock.json

	- name: Install dependencies
	working-directory: src
	run: npm ci

	- name: Run benchmark
	id: run-benchmark
	working-directory: src
	run: npm run benchmark -- --model="${{ github.event.inputs.model }}" --debug
	env:
	TINYBIRD_API_HOST: ${{ vars.TINYBIRD_API_HOST }}
	TINYBIRD_WORKSPACE_TOKEN: ${{ secrets.WORKSPACE_TOKEN }}
	WORKSPACE_TOKEN: ${{ secrets.WORKSPACE_TOKEN }}
	OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}

	- name: Report benchmark failure
	if: failure() && steps.run-benchmark.outcome == 'failure'
	working-directory: src
	run: \|
	MODEL="${{ github.event.inputs.model }}"
	PROVIDER=$(echo "$MODEL" \| cut -d'/' -f1)
	MODEL_NAME=$(echo "$MODEL" \| cut -d'/' -f2)

	echo "Benchmark failed for $MODEL"
	echo "Adding to failed models list..."

	# Add to failed models
	npm run manage-failed-models add "$PROVIDER" "$MODEL_NAME" "$MODEL" "Benchmark workflow failed"

	echo "Model $MODEL has been added to the failed list"

	- name: Commit failure changes
	if: failure() && steps.run-benchmark.outcome == 'failure'
	run: \|
	git config --local user.email "action@github.com"
	git config --local user.name "GitHub Action"

	# Check if there are changes to commit
	if git diff --quiet; then
	echo "No changes to commit"
	else
	git add src/untested-models.json src/failed-models.json
	git commit -m "Add failed model ${{ github.event.inputs.model }} to tracking lists

	- Model: ${{ github.event.inputs.model }}
	- Reason: Benchmark workflow failed
	- Run ID: ${{ github.run_id }}

	This prevents the model from being automatically benchmarked again."

	git push origin main
	echo "Failure changes committed and pushed to main branch"
	fi

	- name: Create normalized branch name
	id: branch-name
	run: \|
	MODEL_NAME="${{ github.event.inputs.model }}"
	NORMALIZED_MODEL=$(echo "$MODEL_NAME" \| sed 's/[\/\s_\.:]/-/g')
	echo "branch_name=benchmark/$NORMALIZED_MODEL-${{ github.run_id }}" >> $GITHUB_OUTPUT

	- name: Create Pull Request
	id: create-pr
	uses: peter-evans/create-pull-request@v6
	with:
	token: ${{ secrets.GITHUB_TOKEN }}
	commit-message: "feat: add benchmark results for ${{ github.event.inputs.model }}"
	title: "Add benchmark results for ${{ github.event.inputs.model }}"
	body: \|
	This PR adds benchmark results for the ${{ github.event.inputs.model }} model.

	The following files have been updated:
	- `src/benchmark/results.json` - Raw benchmark results
	- `src/benchmark/validation-results.json` - Validation results against human baseline

	This PR was automatically generated by the benchmark workflow.

	Note: If you don't want to merge this PR, close it and the model will be added to the untested list to prevent re-processing.

	@alrocar
	branch: ${{ steps.branch-name.outputs.branch_name }}
	base: main
	delete-branch: true

	- name: Monitor PR status
	if: steps.create-pr.outputs.pull-request-number
	uses: actions/github-script@v7
	with:
	script: \|
	const prNumber = ${{ steps.create-pr.outputs.pull-request-number }};
	const model = '${{ github.event.inputs.model }}';
	const [provider, modelName] = model.split('/');

	// Wait a bit for the PR to be created
	await new Promise(resolve => setTimeout(resolve, 5000));

	try {
	const pr = await github.rest.pulls.get({
	owner: context.repo.owner,
	repo: context.repo.repo,
	pull_number: prNumber
	});

	if (pr.data.state === 'closed' && !pr.data.merged_at) {
	console.log(`PR #${prNumber} was closed without merging for model ${model}`);
	console.log('This model will be added to the untested list to prevent re-processing');

	// Create a comment on the closed PR
	await github.rest.issues.createComment({
	owner: context.repo.owner,
	repo: context.repo.repo,
	issue_number: prNumber,
	body: `This model has been added to the untested list to prevent automatic re-processing.`
	});
	}
	} catch (error) {
	console.log('Could not check PR status:', error.message);
	}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add new model to benchmark #296

Workflow file

Add new model to benchmark #296

Uh oh!

Workflow file for this run