diff --git a/.github/workflows/update-codeowners.yml b/.github/workflows/update-codeowners.yml new file mode 100644 index 0000000000..2a7dfcba9a --- /dev/null +++ b/.github/workflows/update-codeowners.yml @@ -0,0 +1,114 @@ +name: Update CODEOWNERS + +on: + schedule: + # Run weekly on Monday at 00:00 UTC + - cron: '0 0 * * 1' + workflow_dispatch: # Allow manual triggering + # NOTE(Zihao): debugging only, remove later + pull_request: + branches: + - main + +permissions: + contents: write + pull-requests: write + +jobs: + update-codeowners: + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - name: Checkout repository + uses: actions/checkout@v4.2.2 + with: + fetch-depth: 0 # Fetch full history for accurate analysis + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Run CODEOWNERS analyzer + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + python scripts/codeowner_analyzer.py \ + --output .github/CODEOWNERS \ + --depth 3 \ + --min-commits 1 \ + --days-back 180 \ + --top-n 5 \ + --no-api + + - name: Check for changes + id: check_changes + run: | + # Check if CODEOWNERS file is new (unstaged) or has changes + if git ls-files --error-unmatch .github/CODEOWNERS >/dev/null 2>&1; then + # File is tracked, check for changes + if git diff --quiet .github/CODEOWNERS; then + echo "changed=false" >> $GITHUB_OUTPUT + echo "No changes detected in CODEOWNERS" + else + echo "changed=true" >> $GITHUB_OUTPUT + echo "Changes detected in CODEOWNERS" + fi + else + # File is untracked (newly created) + echo "changed=true" >> $GITHUB_OUTPUT + echo "CODEOWNERS file is new" + fi + + - name: Create Pull Request + if: steps.check_changes.outputs.changed == 'true' + uses: peter-evans/create-pull-request@v7 + with: + token: ${{ secrets.FLASHINFER_GITHUB_TOKEN }} + commit-message: | + chore: update CODEOWNERS based on git history + + Auto-generated CODEOWNERS update based on commit activity over the last 365 days. + + 🤖 Generated with [Claude Code](https://claude.com/claude-code) + + Co-Authored-By: Claude + branch: auto-update-codeowners + base: main + delete-branch: true + title: 'chore: Update CODEOWNERS' + body: | + ## Summary + + This PR updates the CODEOWNERS file based on git commit history analysis from the last 365 days. + + ## Changes + + - Updated `.github/CODEOWNERS` with current code ownership based on: + - Commit frequency + - File coverage + - Commit recency + + ## How to Review + + 1. Review the changes to `.github/CODEOWNERS` + 2. Verify that the assigned owners are appropriate for each module + 3. Make manual adjustments if needed before merging + + ## Notes + + - This is an automated PR generated weekly + - Minimum commits threshold: 2 + - Analysis period: 365 days + - Directory depth: 3 levels + + --- + + 🤖 This PR was automatically generated by the [update-codeowners workflow](.github/workflows/update-codeowners.yml) + labels: | + automated + maintenance + assignees: | + + reviewers: | diff --git a/scripts/codeowner_analyzer.py b/scripts/codeowner_analyzer.py index 8018244fd2..b18ec0a807 100644 --- a/scripts/codeowner_analyzer.py +++ b/scripts/codeowner_analyzer.py @@ -31,6 +31,8 @@ def __init__( github_token: Optional[str] = None, use_api: bool = True, allowed_users: Optional[List[str]] = None, + max_depth: int = 3, + top_n_owners: int = 3, ): """ Initialize the code owners analyzer. @@ -43,10 +45,14 @@ def __init__( github_token: Optional GitHub API token for higher rate limits use_api: Whether to use GitHub API for email lookups (default: True) allowed_users: Optional list of GitHub usernames to include (filters out others) + max_depth: Maximum directory depth for module detection (default: 3) + top_n_owners: Number of top owners to include in CODEOWNERS file (default: 3) """ self.repo_path = Path(repo_path).resolve() self.min_commits = min_commits self.days_back = days_back + self.max_depth = max_depth + self.top_n_owners = top_n_owners self.module_owners: DefaultDict[str, DefaultDict[str, int]] = defaultdict( lambda: defaultdict(int) ) @@ -439,8 +445,10 @@ def get_modules(self) -> List[str]: if file_ext in relevant_extensions: # Add the directory and all parent directories as modules + # Limited by max_depth path_parts = Path(dir_path).parts - for i in range(1, len(path_parts) + 1): + max_parts = min(len(path_parts), self.max_depth) + for i in range(1, max_parts + 1): module = "/".join(path_parts[:i]) if not self.should_exclude(module): modules.add(module) @@ -654,10 +662,10 @@ def generate_codeowners_file( for module, data in results.items(): if data["owners"]: - # Take top 3 owners or those with ownership score > 0.1 + # Take top N owners or those with ownership score > 0.1 top_owners = [ owner - for owner in data["owners"][:3] + for owner in data["owners"][: self.top_n_owners] if owner["ownership_score"] > 0.1 ] @@ -773,6 +781,18 @@ def main() -> int: "--allowed-users-file", help="File containing allowed GitHub usernames, one per line", ) + parser.add_argument( + "--depth", + type=int, + default=3, + help="Maximum directory depth for module detection (default: 3)", + ) + parser.add_argument( + "--top-n", + type=int, + default=3, + help="Number of top owners to include in CODEOWNERS file (default: 3)", + ) args = parser.parse_args() @@ -811,6 +831,8 @@ def main() -> int: github_token=args.github_token, use_api=not args.no_api, allowed_users=allowed_users, + max_depth=args.depth, + top_n_owners=args.top_n, ) except ValueError as e: print(f"Error: {e}", file=sys.stderr)