Skip to content

Commit 5aabb05

Browse files
committed
Add mypy_primer diff classifier
Adds a classifier that automatically categorizes mypy_primer diff output into regressions, improvements, and neutral changes. Uses heuristics for trivial cases and an LLM for non-trivial diffs. Includes a manual-trigger GitHub Actions workflow.
1 parent c5ae41d commit 5aabb05

File tree

8 files changed

+1341
-0
lines changed

8 files changed

+1341
-0
lines changed
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
name: Classify mypy_primer diff
2+
3+
on:
4+
workflow_dispatch:
5+
inputs:
6+
primer_run_id:
7+
description: "Run ID of the completed 'Run mypy_primer' workflow"
8+
required: true
9+
type: string
10+
11+
permissions: {}
12+
13+
jobs:
14+
classify:
15+
name: Classify primer diff
16+
runs-on: ubuntu-latest
17+
permissions:
18+
contents: read
19+
pull-requests: write
20+
if: ${{ inputs.primer_run_id != '' }}
21+
steps:
22+
- uses: actions/checkout@v6
23+
with:
24+
sparse-checkout: scripts/primer_classifier
25+
persist-credentials: false
26+
27+
- uses: actions/setup-python@v6
28+
with:
29+
python-version: "3.13"
30+
31+
- name: Download diffs
32+
uses: actions/github-script@v8
33+
with:
34+
script: |
35+
const fs = require('fs');
36+
const artifacts = await github.rest.actions.listWorkflowRunArtifacts({
37+
owner: context.repo.owner,
38+
repo: context.repo.repo,
39+
run_id: ${{ inputs.primer_run_id }},
40+
});
41+
const [matchArtifact] = artifacts.data.artifacts.filter((artifact) =>
42+
artifact.name == "mypy_primer_diffs");
43+
44+
const download = await github.rest.actions.downloadArtifact({
45+
owner: context.repo.owner,
46+
repo: context.repo.repo,
47+
artifact_id: matchArtifact.id,
48+
archive_format: "zip",
49+
});
50+
fs.writeFileSync("diff.zip", Buffer.from(download.data));
51+
52+
- run: unzip diff.zip
53+
- run: cat diff_*.txt | tee fulldiff.txt
54+
55+
- name: Classify diff
56+
run: |
57+
python -m scripts.primer_classifier \
58+
--diff-file fulldiff.txt \
59+
--output-format markdown \
60+
| tee classification.md
61+
env:
62+
LLAMA_API_KEY: ${{ secrets.LLAMA_API_KEY }}
63+
CLASSIFIER_API_KEY: ${{ secrets.PRIMER_CLASSIFIER_API_KEY }}
64+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
65+
66+
- name: Read PR number
67+
id: pr-number
68+
run: |
69+
echo "pr_number=$(cat pr_number.txt)" >> "$GITHUB_OUTPUT"
70+
71+
- name: Post classification comment
72+
if: ${{ hashFiles('classification.md') != '' }}
73+
uses: actions/github-script@v8
74+
with:
75+
github-token: ${{ secrets.GITHUB_TOKEN }}
76+
script: |
77+
const fs = require('fs')
78+
const body = fs.readFileSync('classification.md', { encoding: 'utf8' })
79+
if (body.trim()) {
80+
const prNumber = parseInt(fs.readFileSync("pr_number.txt", { encoding: "utf8" }))
81+
await github.rest.issues.createComment({
82+
issue_number: prNumber,
83+
owner: context.repo.owner,
84+
repo: context.repo.repo,
85+
body
86+
})
87+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
#
3+
# This source code is licensed under the MIT license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
"""Classifier for mypy_primer diff output on pyrefly PRs."""
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
#
3+
# This source code is licensed under the MIT license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
"""CLI entry point for the primer classifier.
7+
8+
Usage:
9+
python -m scripts.primer_classifier --diff-file path/to/diff.txt [options]
10+
11+
Options:
12+
--diff-file FILE Path to the primer diff text file (required)
13+
--dry-run Parse only, skip LLM classification
14+
--fetch-code Fetch source code from GitHub (default: True with LLM)
15+
--no-fetch-code Skip fetching source code
16+
--output-format FMT Output format: "json" or "markdown" (default: markdown)
17+
--model MODEL LLM model to use (default: claude-sonnet-4-20250514)
18+
"""
19+
20+
from __future__ import annotations
21+
22+
import argparse
23+
import sys
24+
25+
from .classifier import classify_all
26+
from .formatter import format_json, format_markdown
27+
from .parser import parse_primer_diff
28+
29+
30+
def main() -> int:
31+
parser = argparse.ArgumentParser(
32+
prog="primer_classifier",
33+
description="Classify mypy_primer diff output for pyrefly PRs",
34+
)
35+
parser.add_argument(
36+
"--diff-file",
37+
required=True,
38+
help="Path to the primer diff text file",
39+
)
40+
parser.add_argument(
41+
"--dry-run",
42+
action="store_true",
43+
help="Parse and apply heuristics only, skip LLM classification",
44+
)
45+
parser.add_argument(
46+
"--fetch-code",
47+
action=argparse.BooleanOptionalAction,
48+
default=None,
49+
help="Fetch source code from GitHub (default: enabled when using LLM)",
50+
)
51+
parser.add_argument(
52+
"--output-format",
53+
choices=["json", "markdown"],
54+
default="markdown",
55+
help="Output format (default: markdown)",
56+
)
57+
parser.add_argument(
58+
"--model",
59+
default=None,
60+
help="LLM model to use (default: claude-sonnet-4-20250514)",
61+
)
62+
63+
args = parser.parse_args()
64+
65+
# Read the diff file
66+
try:
67+
with open(args.diff_file) as f:
68+
diff_text = f.read()
69+
except FileNotFoundError:
70+
print(f"Error: file not found: {args.diff_file}", file=sys.stderr)
71+
return 1
72+
except OSError as e:
73+
print(f"Error reading file: {e}", file=sys.stderr)
74+
return 1
75+
76+
# Parse
77+
projects = parse_primer_diff(diff_text)
78+
if not projects:
79+
if args.output_format == "json":
80+
print('{"summary": {"total_projects": 0}, "classifications": []}')
81+
else:
82+
print("No diffs to classify.")
83+
return 0
84+
85+
print(
86+
f"Parsed {len(projects)} project(s) from diff",
87+
file=sys.stderr,
88+
)
89+
90+
# Determine fetch_code setting
91+
use_llm = not args.dry_run
92+
if args.fetch_code is None:
93+
fetch_code = use_llm # fetch code when using LLM
94+
else:
95+
fetch_code = args.fetch_code
96+
97+
# Classify
98+
result = classify_all(
99+
projects,
100+
fetch_code=fetch_code,
101+
use_llm=use_llm,
102+
)
103+
104+
# Output
105+
if args.output_format == "json":
106+
print(format_json(result))
107+
else:
108+
print(format_markdown(result))
109+
110+
# Return non-zero if there are regressions (useful for CI)
111+
return 1 if result.regressions > 0 else 0
112+
113+
114+
if __name__ == "__main__":
115+
sys.exit(main())

0 commit comments

Comments
 (0)