facebook
diff --git a/‎.github/workflows/mypy_primer_classify.yml‎
Lines changed: 87 additions & 0 deletions b/‎.github/workflows/mypy_primer_classify.yml‎
Lines changed: 87 additions & 0 deletions
diff --git a/‎scripts/primer_classifier/__init__.py‎
Lines changed: 6 additions & 0 deletions b/‎scripts/primer_classifier/__init__.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎scripts/primer_classifier/__main__.py‎
Lines changed: 115 additions & 0 deletions b/‎scripts/primer_classifier/__main__.py‎
Lines changed: 115 additions & 0 deletions
@@ -0,0 +1,87 @@
+name: Classify mypy_primer diff
+
+on:
+  workflow_dispatch:
+    inputs:
+      primer_run_id:
+        description: "Run ID of the completed 'Run mypy_primer' workflow"
+        required: true
+        type: string
+
+permissions: {}
+
+jobs:
+  classify:
+    name: Classify primer diff
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: write
+    if: ${{ inputs.primer_run_id != '' }}
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          sparse-checkout: scripts/primer_classifier
+          persist-credentials: false
+
+      - uses: actions/setup-python@v6
+        with:
+          python-version: "3.13"
+
+      - name: Download diffs
+        uses: actions/github-script@v8
+        with:
+          script: |
+            const fs = require('fs');
+            const artifacts = await github.rest.actions.listWorkflowRunArtifacts({
+               owner: context.repo.owner,
+               repo: context.repo.repo,
+               run_id: ${{ inputs.primer_run_id }},
+            });
+            const [matchArtifact] = artifacts.data.artifacts.filter((artifact) =>
+              artifact.name == "mypy_primer_diffs");
+
+            const download = await github.rest.actions.downloadArtifact({
+               owner: context.repo.owner,
+               repo: context.repo.repo,
+               artifact_id: matchArtifact.id,
+               archive_format: "zip",
+            });
+            fs.writeFileSync("diff.zip", Buffer.from(download.data));
+
+      - run: unzip diff.zip
+      - run: cat diff_*.txt | tee fulldiff.txt
+
+      - name: Classify diff
+        run: |
+          python -m scripts.primer_classifier \
+            --diff-file fulldiff.txt \
+            --output-format markdown \
+            | tee classification.md
+        env:
+          LLAMA_API_KEY: ${{ secrets.LLAMA_API_KEY }}
+          CLASSIFIER_API_KEY: ${{ secrets.PRIMER_CLASSIFIER_API_KEY }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Read PR number
+        id: pr-number
+        run: |
+          echo "pr_number=$(cat pr_number.txt)" >> "$GITHUB_OUTPUT"
+
+      - name: Post classification comment
+        if: ${{ hashFiles('classification.md') != '' }}
+        uses: actions/github-script@v8
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            const fs = require('fs')
+            const body = fs.readFileSync('classification.md', { encoding: 'utf8' })
+            if (body.trim()) {
+              const prNumber = parseInt(fs.readFileSync("pr_number.txt", { encoding: "utf8" }))
+              await github.rest.issues.createComment({
+                issue_number: prNumber,
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                body
+              })
+            }
@@ -0,0 +1,6 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Classifier for mypy_primer diff output on pyrefly PRs."""
@@ -0,0 +1,115 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""CLI entry point for the primer classifier.
+
+Usage:
+    python -m scripts.primer_classifier --diff-file path/to/diff.txt [options]
+
+Options:
+    --diff-file FILE     Path to the primer diff text file (required)
+    --dry-run            Parse only, skip LLM classification
+    --fetch-code         Fetch source code from GitHub (default: True with LLM)
+    --no-fetch-code      Skip fetching source code
+    --output-format FMT  Output format: "json" or "markdown" (default: markdown)
+    --model MODEL        LLM model to use (default: claude-sonnet-4-20250514)
+"""
+
+from __future__ import annotations
+
+import argparse
+import sys
+
+from .classifier import classify_all
+from .formatter import format_json, format_markdown
+from .parser import parse_primer_diff
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(
+        prog="primer_classifier",
+        description="Classify mypy_primer diff output for pyrefly PRs",
+    )
+    parser.add_argument(
+        "--diff-file",
+        required=True,
+        help="Path to the primer diff text file",
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Parse and apply heuristics only, skip LLM classification",
+    )
+    parser.add_argument(
+        "--fetch-code",
+        action=argparse.BooleanOptionalAction,
+        default=None,
+        help="Fetch source code from GitHub (default: enabled when using LLM)",
+    )
+    parser.add_argument(
+        "--output-format",
+        choices=["json", "markdown"],
+        default="markdown",
+        help="Output format (default: markdown)",
+    )
+    parser.add_argument(
+        "--model",
+        default=None,
+        help="LLM model to use (default: claude-sonnet-4-20250514)",
+    )
+
+    args = parser.parse_args()
+
+    # Read the diff file
+    try:
+        with open(args.diff_file) as f:
+            diff_text = f.read()
+    except FileNotFoundError:
+        print(f"Error: file not found: {args.diff_file}", file=sys.stderr)
+        return 1
+    except OSError as e:
+        print(f"Error reading file: {e}", file=sys.stderr)
+        return 1
+
+    # Parse
+    projects = parse_primer_diff(diff_text)
+    if not projects:
+        if args.output_format == "json":
+            print('{"summary": {"total_projects": 0}, "classifications": []}')
+        else:
+            print("No diffs to classify.")
+        return 0
+
+    print(
+        f"Parsed {len(projects)} project(s) from diff",
+        file=sys.stderr,
+    )
+
+    # Determine fetch_code setting
+    use_llm = not args.dry_run
+    if args.fetch_code is None:
+        fetch_code = use_llm  # fetch code when using LLM
+    else:
+        fetch_code = args.fetch_code
+
+    # Classify
+    result = classify_all(
+        projects,
+        fetch_code=fetch_code,
+        use_llm=use_llm,
+    )
+
+    # Output
+    if args.output_format == "json":
+        print(format_json(result))
+    else:
+        print(format_markdown(result))
+
+    # Return non-zero if there are regressions (useful for CI)
+    return 1 if result.regressions > 0 else 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())