Skip to content

Auto-discover new OpenRouter models #165

Auto-discover new OpenRouter models

Auto-discover new OpenRouter models #165

name: Auto-discover new OpenRouter models
on:
schedule:
# Run every 6 hours to check for new models
- cron: '0 */6 * * *'
workflow_dispatch:
inputs:
dry_run:
description: "Dry run mode - don't actually run benchmarks"
required: false
default: false
type: boolean
max_models:
description: "Maximum number of new models to benchmark (0 = no limit)"
required: false
default: "0"
type: string
force_run:
description: "Force run even if no new models found (for testing)"
required: false
default: false
type: boolean
jobs:
discover-new-models:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
token: ${{ secrets.GITHUB_TOKEN }}
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '18'
cache: 'npm'
cache-dependency-path: src/package-lock.json
- name: Install dependencies
working-directory: src
run: npm ci
- name: Check for new models
id: find-new-models
working-directory: src
run: |
# Run the check-new-models script
npm run check-new-models || {
# Script exits with code 1 when new models are found
if [ $? -eq 1 ]; then
# Read the new models JSON file
if [ -f "new-models.json" ]; then
NEW_MODELS=$(node -e "
const fs = require('fs');
const data = JSON.parse(fs.readFileSync('new-models.json', 'utf8'));
console.log(data.models.map(m => m.provider + '/' + m.model).join(' '));
")
NEW_COUNT=$(node -e "
const fs = require('fs');
const data = JSON.parse(fs.readFileSync('new-models.json', 'utf8'));
console.log(data.total_count);
")
echo "new_models=$NEW_MODELS" >> $GITHUB_OUTPUT
echo "new_count=$NEW_COUNT" >> $GITHUB_OUTPUT
echo "Found $NEW_COUNT new models: $NEW_MODELS"
else
echo "new_models=" >> $GITHUB_OUTPUT
echo "new_count=0" >> $GITHUB_OUTPUT
fi
else
echo "Error running check-new-models script"
echo "new_models=" >> $GITHUB_OUTPUT
echo "new_count=0" >> $GITHUB_OUTPUT
fi
}
- name: Check for existing PRs and run benchmarks
if: steps.find-new-models.outputs.new_count > 0 && github.event.inputs.dry_run != 'true'
env:
GH_TOKEN: ${{ github.token }}
run: |
NEW_MODELS="${{ steps.find-new-models.outputs.new_models }}"
MAX_MODELS="${{ github.event.inputs.max_models || '0' }}"
echo "Checking for existing PRs and running benchmarks for new models: $NEW_MODELS"
echo "Max models limit: $MAX_MODELS"
# Convert to array and apply limit if specified
MODELS_ARRAY=($NEW_MODELS)
if [ "$MAX_MODELS" != "0" ] && [ "$MAX_MODELS" -gt 0 ]; then
MODELS_ARRAY=("${MODELS_ARRAY[@]:0:$MAX_MODELS}")
echo "Limited to first $MAX_MODELS models: ${MODELS_ARRAY[*]}"
fi
SKIPPED_COUNT=0
TRIGGERED_COUNT=0
for model in "${MODELS_ARRAY[@]}"; do
echo "=== Checking $model ==="
# Check if there's already an open PR for this model
EXISTING_PR=$(gh pr list --state open --search "Add benchmark results for $model" --json number --jq '.[0].number' || echo "")
if [ -n "$EXISTING_PR" ] && [ "$EXISTING_PR" != "null" ]; then
echo "⚠️ Open PR #$EXISTING_PR already exists for $model - skipping"
SKIPPED_COUNT=$((SKIPPED_COUNT + 1))
continue
fi
echo "✅ No existing PR found for $model - triggering benchmark"
# Trigger the benchmark-append workflow for this model
gh workflow run benchmark-append.yml \
--ref main \
-f model="$model" \
|| echo "Failed to trigger workflow for $model"
TRIGGERED_COUNT=$((TRIGGERED_COUNT + 1))
# Add a small delay between triggers
sleep 5
done
echo "📊 Summary:"
echo " - Models checked: ${#MODELS_ARRAY[@]}"
echo " - Benchmarks triggered: $TRIGGERED_COUNT"
echo " - Skipped (existing PRs): $SKIPPED_COUNT"
- name: Log discovery results
if: steps.find-new-models.outputs.new_count > 0 || github.event.inputs.force_run == 'true'
run: |
NEW_COUNT="${{ steps.find-new-models.outputs.new_count }}"
NEW_MODELS="${{ steps.find-new-models.outputs.new_models }}"
DRY_RUN="${{ github.event.inputs.dry_run }}"
MAX_MODELS="${{ github.event.inputs.max_models || '0' }}"
FORCE_RUN="${{ github.event.inputs.force_run }}"
if [ "$NEW_COUNT" -gt 0 ]; then
echo "🤖 Auto-discovery: Found $NEW_COUNT new OpenRouter models"
echo "Models: $NEW_MODELS"
if [ "$DRY_RUN" = "true" ]; then
echo "DRY RUN MODE - No benchmarks were triggered"
else
echo "Benchmarks triggered for each model"
if [ "$MAX_MODELS" != "0" ]; then
echo "Limited to $MAX_MODELS models"
fi
fi
elif [ "$FORCE_RUN" = "true" ]; then
echo "🤖 Auto-discovery: Manual run completed (no new models found)"
echo "All OpenRouter models are either tested or in the untested list"
echo "Mode: $([ "$DRY_RUN" = "true" ] && echo "Dry run" || echo "Live run")"
echo "Max models: $([ "$MAX_MODELS" = "0" ] && echo "No limit" || echo "$MAX_MODELS")"
fi
- name: No new models found
if: steps.find-new-models.outputs.new_count == 0
run: |
echo "No new models found. All OpenRouter models are either tested or in the untested list."
echo "Current status:"
echo "- Tested models: $(node -e "const fs = require('fs'); const config = JSON.parse(fs.readFileSync('benchmark-config.json', 'utf8')); let count = 0; for (const data of Object.values(config.providers)) { count += data.models.length; } console.log(count);")"
echo "- Untested models: $(node -e "const fs = require('fs'); const data = JSON.parse(fs.readFileSync('untested-models.json', 'utf8')); console.log(data.total_count);")"