|
| 1 | +name: Validate Leaderboard Submission |
| 2 | + |
| 3 | +on: |
| 4 | + pull_request: |
| 5 | + paths: |
| 6 | + - 'submissions/**/*-assessment.json' |
| 7 | + |
| 8 | +jobs: |
| 9 | + validate: |
| 10 | + runs-on: ubuntu-latest |
| 11 | + |
| 12 | + steps: |
| 13 | + - name: Checkout PR branch |
| 14 | + uses: actions/checkout@v4 |
| 15 | + with: |
| 16 | + ref: ${{ github.event.pull_request.head.sha }} |
| 17 | + |
| 18 | + - name: Set up Python |
| 19 | + uses: actions/setup-python@v5 |
| 20 | + with: |
| 21 | + python-version: '3.12' |
| 22 | + |
| 23 | + - name: Install dependencies |
| 24 | + run: | |
| 25 | + pip install uv |
| 26 | + uv venv |
| 27 | + source .venv/bin/activate |
| 28 | + uv pip install -e . |
| 29 | +
|
| 30 | + - name: Extract submission details |
| 31 | + id: extract |
| 32 | + run: | |
| 33 | + # Find changed JSON file |
| 34 | + CHANGED_FILE=$(git diff --name-only origin/main...HEAD | grep 'submissions/.*-assessment.json' | head -1) |
| 35 | +
|
| 36 | + if [ -z "$CHANGED_FILE" ]; then |
| 37 | + echo "No assessment file found in diff" |
| 38 | + exit 1 |
| 39 | + fi |
| 40 | +
|
| 41 | + echo "file=$CHANGED_FILE" >> $GITHUB_OUTPUT |
| 42 | +
|
| 43 | + # Parse JSON |
| 44 | + REPO_URL=$(jq -r '.repository.url' "$CHANGED_FILE") |
| 45 | + CLAIMED_SCORE=$(jq -r '.overall_score' "$CHANGED_FILE") |
| 46 | + REPO_NAME=$(jq -r '.repository.name' "$CHANGED_FILE") |
| 47 | +
|
| 48 | + echo "repo_url=$REPO_URL" >> $GITHUB_OUTPUT |
| 49 | + echo "claimed_score=$CLAIMED_SCORE" >> $GITHUB_OUTPUT |
| 50 | + echo "repo_name=$REPO_NAME" >> $GITHUB_OUTPUT |
| 51 | +
|
| 52 | + - name: Validate JSON schema |
| 53 | + env: |
| 54 | + ASSESSMENT_FILE: ${{ steps.extract.outputs.file }} |
| 55 | + run: | |
| 56 | + source .venv/bin/activate |
| 57 | + agentready validate-report "$ASSESSMENT_FILE" |
| 58 | +
|
| 59 | + - name: Verify repository exists and is public |
| 60 | + env: |
| 61 | + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} |
| 62 | + REPO_URL: ${{ steps.extract.outputs.repo_url }} |
| 63 | + run: | |
| 64 | + ORG_REPO=$(echo "$REPO_URL" | sed 's|https://github.com/||' | sed 's|\.git$||') |
| 65 | +
|
| 66 | + # Check if repo exists and is public |
| 67 | + IS_PRIVATE=$(gh repo view "$ORG_REPO" --json isPrivate -q '.isPrivate') |
| 68 | +
|
| 69 | + if [ "$IS_PRIVATE" == "true" ]; then |
| 70 | + echo "::error::Repository $ORG_REPO is private. Only public repositories can be submitted to the leaderboard." |
| 71 | + exit 1 |
| 72 | + fi |
| 73 | +
|
| 74 | + echo "✅ Repository $ORG_REPO is public" |
| 75 | +
|
| 76 | + - name: Verify submitter has access |
| 77 | + env: |
| 78 | + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} |
| 79 | + REPO_URL: ${{ steps.extract.outputs.repo_url }} |
| 80 | + SUBMITTER: ${{ github.event.pull_request.user.login }} |
| 81 | + run: | |
| 82 | + ORG_REPO=$(echo "$REPO_URL" | sed 's|https://github.com/||' | sed 's|\.git$||') |
| 83 | +
|
| 84 | + # Check if submitter is collaborator or owner |
| 85 | + if gh api "/repos/$ORG_REPO/collaborators/$SUBMITTER" 2>/dev/null; then |
| 86 | + echo "✅ $SUBMITTER is a collaborator on $ORG_REPO" |
| 87 | + elif [ "$(gh api "/repos/$ORG_REPO" -q '.owner.login')" == "$SUBMITTER" ]; then |
| 88 | + echo "✅ $SUBMITTER is the owner of $ORG_REPO" |
| 89 | + else |
| 90 | + echo "::error::$SUBMITTER does not have commit access to $ORG_REPO" |
| 91 | + exit 1 |
| 92 | + fi |
| 93 | +
|
| 94 | + - name: Check rate limiting |
| 95 | + env: |
| 96 | + REPO_URL: ${{ steps.extract.outputs.repo_url }} |
| 97 | + run: | |
| 98 | + ORG_REPO=$(echo "$REPO_URL" | sed 's|https://github.com/||' | sed 's|\.git$||') |
| 99 | + SUBMISSIONS_DIR="submissions/$(dirname $ORG_REPO)/$(basename $ORG_REPO)" |
| 100 | +
|
| 101 | + # Count submissions in last 24 hours |
| 102 | + if [ -d "$SUBMISSIONS_DIR" ]; then |
| 103 | + CUTOFF=$(date -u -d '24 hours ago' +%Y-%m-%dT%H:%M:%S 2>/dev/null || date -u -v-24H +%Y-%m-%dT%H:%M:%S) |
| 104 | + RECENT_COUNT=$(find "$SUBMISSIONS_DIR" -name '*-assessment.json' -type f -newer <(date -d "$CUTOFF" +%s 2>/dev/null || echo 0) 2>/dev/null | wc -l) |
| 105 | +
|
| 106 | + if [ "$RECENT_COUNT" -ge 1 ]; then |
| 107 | + echo "::warning::Repository $ORG_REPO has $RECENT_COUNT submission(s) in the last 24 hours. Maximum is 1 per 24 hours." |
| 108 | + echo "This is a warning only - manual review recommended." |
| 109 | + fi |
| 110 | + fi |
| 111 | +
|
| 112 | + - name: Re-run assessment |
| 113 | + env: |
| 114 | + REPO_URL: ${{ steps.extract.outputs.repo_url }} |
| 115 | + run: | |
| 116 | + source .venv/bin/activate |
| 117 | +
|
| 118 | + # Clone repository to temporary directory |
| 119 | + echo "Cloning $REPO_URL..." |
| 120 | + git clone "$REPO_URL" /tmp/repo-to-assess |
| 121 | +
|
| 122 | + # Run assessment |
| 123 | + echo "Running assessment..." |
| 124 | + agentready assess /tmp/repo-to-assess --output-dir /tmp/validation |
| 125 | +
|
| 126 | + # Extract actual score |
| 127 | + ACTUAL_SCORE=$(jq -r '.overall_score' /tmp/validation/assessment-latest.json) |
| 128 | + echo "ACTUAL_SCORE=$ACTUAL_SCORE" >> $GITHUB_ENV |
| 129 | + echo "Actual score: $ACTUAL_SCORE" |
| 130 | +
|
| 131 | + - name: Compare scores |
| 132 | + env: |
| 133 | + CLAIMED_SCORE: ${{ steps.extract.outputs.claimed_score }} |
| 134 | + run: | |
| 135 | + CLAIMED=$CLAIMED_SCORE |
| 136 | + ACTUAL=$ACTUAL_SCORE |
| 137 | +
|
| 138 | + # Calculate absolute difference |
| 139 | + DIFF=$(echo "scale=2; if ($ACTUAL - $CLAIMED < 0) $CLAIMED - $ACTUAL else $ACTUAL - $CLAIMED" | bc) |
| 140 | +
|
| 141 | + echo "Claimed score: $CLAIMED" |
| 142 | + echo "Actual score: $ACTUAL" |
| 143 | + echo "Difference: $DIFF" |
| 144 | +
|
| 145 | + # Allow ±2 point tolerance |
| 146 | + if (( $(echo "$DIFF > 2" | bc -l) )); then |
| 147 | + echo "::error::Score mismatch: claimed $CLAIMED, actual $ACTUAL (diff: $DIFF points)" |
| 148 | + echo "Maximum tolerance is ±2 points." |
| 149 | + exit 1 |
| 150 | + fi |
| 151 | +
|
| 152 | + echo "::notice::✅ Score validated: $ACTUAL (claimed: $CLAIMED, diff: $DIFF points)" |
| 153 | +
|
| 154 | + - name: Comment on PR |
| 155 | + if: always() |
| 156 | + uses: actions/github-script@v7 |
| 157 | + env: |
| 158 | + CLAIMED_SCORE: ${{ steps.extract.outputs.claimed_score }} |
| 159 | + ACTUAL_SCORE: ${{ env.ACTUAL_SCORE }} |
| 160 | + REPO_NAME: ${{ steps.extract.outputs.repo_name }} |
| 161 | + with: |
| 162 | + script: | |
| 163 | + const claimed = process.env.CLAIMED_SCORE || 'N/A'; |
| 164 | + const actual = process.env.ACTUAL_SCORE || 'N/A'; |
| 165 | + const repoName = process.env.REPO_NAME || 'unknown'; |
| 166 | +
|
| 167 | + let diff = 'N/A'; |
| 168 | + let status = '❌ **FAILED**'; |
| 169 | + let message = 'Validation checks failed. See workflow logs for details.'; |
| 170 | +
|
| 171 | + if (actual !== 'N/A') { |
| 172 | + diff = Math.abs(parseFloat(actual) - parseFloat(claimed)).toFixed(1); |
| 173 | +
|
| 174 | + if (parseFloat(diff) <= 2.0) { |
| 175 | + status = '✅ **PASSED**'; |
| 176 | + message = 'All validation checks successful! Ready for review and merge.'; |
| 177 | + } else { |
| 178 | + status = '❌ **FAILED**'; |
| 179 | + message = `Score mismatch exceeds tolerance (±2 points).`; |
| 180 | + } |
| 181 | + } |
| 182 | +
|
| 183 | + const body = `## Validation Results |
| 184 | +
|
| 185 | +${status} - ${message} |
| 186 | + |
| 187 | +### Scores |
| 188 | + |
| 189 | +- **Claimed**: ${claimed}/100 |
| 190 | +- **Verified**: ${actual}/100 |
| 191 | +- **Difference**: ${diff} points |
| 192 | +- **Tolerance**: ±2 points |
| 193 | + |
| 194 | +### Checks |
| 195 | + |
| 196 | +- ✅ JSON schema valid |
| 197 | +- ✅ Repository is public |
| 198 | +- ✅ Submitter has commit access |
| 199 | +- ${parseFloat(diff) <= 2.0 ? '✅' : '❌'} Score verification passed |
| 200 | + |
| 201 | +${status.includes('PASSED') ? '**This submission is ready for merge!**' : '**Please review the workflow logs for error details.**'} |
| 202 | + |
| 203 | +--- |
| 204 | + |
| 205 | +*Automated validation by [AgentReady Leaderboard](https://github.com/agentready/agentready)* |
| 206 | + `; |
| 207 | +
|
| 208 | + github.rest.issues.createComment({ |
| 209 | + owner: context.repo.owner, |
| 210 | + repo: context.repo.repo, |
| 211 | + issue_number: context.issue.number, |
| 212 | + body: body |
| 213 | + }); |
0 commit comments