diff --git a/.github/scripts/stale_component_handler/README.md b/.github/scripts/stale_component_handler/README.md new file mode 100644 index 00000000..898ea346 --- /dev/null +++ b/.github/scripts/stale_component_handler/README.md @@ -0,0 +1,38 @@ +# Stale Component Handler + +Handles components approaching or past their verification deadline. + +## What it Does + +1. Uses `scripts/check_component_freshness` to categorize components: + - 🟡 **Warning (270-360 days)**: Creates GitHub issues to notify owners + - 🔴 **Stale (>360 days)**: Creates PRs to remove the component + +2. For **warning** components: + - Creates issues with `stale-component` label + - Assigns component owners (up to 10) + - Includes instructions for verification + +3. For **stale** components: + - Creates a branch `remove-stale-{component-name}` + - Removes the component directory + - Regenerates the category README to update the index + - Opens a PR with `stale-component-removal` label + - Adds component owners as reviewers + +## Usage + +```bash +# Dry run (see what would be created) +uv run .github/scripts/stale_component_handler/stale_component_handler.py \ + --repo owner/repo --dry-run + +# Create issues and PRs +GITHUB_TOKEN=ghp_xxx uv run .github/scripts/stale_component_handler/stale_component_handler.py \ + --repo owner/repo +``` + +## Requirements + +- `gh` CLI (pre-installed on GitHub Actions runners) +- `GITHUB_TOKEN` with `issues: write`, `contents: write`, `pull-requests: write` permissions diff --git a/.github/scripts/stale_component_handler/__init__.py b/.github/scripts/stale_component_handler/__init__.py new file mode 100644 index 00000000..6d63b6cf --- /dev/null +++ b/.github/scripts/stale_component_handler/__init__.py @@ -0,0 +1 @@ +# Stale component handler module diff --git a/.github/scripts/stale_component_handler/issue_body.md.j2 b/.github/scripts/stale_component_handler/issue_body.md.j2 new file mode 100644 index 00000000..d69ff929 --- /dev/null +++ b/.github/scripts/stale_component_handler/issue_body.md.j2 @@ -0,0 +1,28 @@ +## Component Verification Required + +The component **{{ name }}** has not been verified in {{ age_days }} days and is approaching the staleness threshold. + +**If not verified within the next {{ 360 - age_days }} days, a PR will be created to remove this component.** + +| Field | Value | +|-------------------------|---------------------| +| Path | `{{ path }}` | +| Last Verified | {{ last_verified }} | +| Days Since Verification | {{ age_days }} | + +### Maintainers + +{{ owners_mention }} + +### How to Resolve + +1. Verify the component still works with the current KFP version +2. Run any associated tests +3. Update `lastVerified` in `{{ path }}/metadata.yaml`: + +```yaml +lastVerified: {{ today }} +``` + +4. Submit a PR with the updated metadata + diff --git a/.github/scripts/stale_component_handler/removal_pr_body.md.j2 b/.github/scripts/stale_component_handler/removal_pr_body.md.j2 new file mode 100644 index 00000000..d392ca79 --- /dev/null +++ b/.github/scripts/stale_component_handler/removal_pr_body.md.j2 @@ -0,0 +1,34 @@ +## Stale Component Removal + +This PR removes the component **{{ name }}** which has not been verified in {{ age_days }} days. + +| Field | Value | +|-------------------------|---------------------| +| Path | `{{ path }}` | +| Last Verified | {{ last_verified }} | +| Days Since Verification | {{ age_days }} | + +### Changes + +- Removes `{{ path }}/` directory +- Updates category README to remove component from index + +### Maintainers + +{{ owners_mention }} + +### Why This PR Was Created + +Components must be verified at least once per year to ensure they remain functional and compatible with current KFP versions. This component has exceeded the staleness threshold and no verification was performed. + +### Before Merging + +If this component should be kept: +1. Close this PR +2. Verify the component still works +3. Update `lastVerified` in `{{ path }}/metadata.yaml` +4. Submit a PR with the updated metadata + +If this component should be removed: +1. Review the changes +2. Approve and merge this PR diff --git a/.github/scripts/stale_component_handler/stale_component_handler.py b/.github/scripts/stale_component_handler/stale_component_handler.py new file mode 100755 index 00000000..adff3af5 --- /dev/null +++ b/.github/scripts/stale_component_handler/stale_component_handler.py @@ -0,0 +1,467 @@ +"""Handle stale components: create warning issues and removal PRs. + +- Warning (270-360 days): Creates GitHub issues to notify owners +- Stale (>360 days): Creates PRs to remove the component +""" + +import argparse +import json +import os +import re +import subprocess +import sys +from datetime import datetime, timezone +from pathlib import Path + +import requests +import yaml +from jinja2 import Environment, FileSystemLoader + +# utils module sets up sys.path and re-exports from scripts/lib/discovery +REPO_ROOT = Path(__file__).parent.parent.parent.parent +sys.path.append(str(REPO_ROOT)) + +from scripts.check_component_freshness.check_component_freshness import scan_repo # noqa: E402 +from scripts.generate_readme.category_index_generator import CategoryIndexGenerator # noqa: E402 + +ISSUE_LABEL = "stale-component" +REMOVAL_LABEL = "stale-component-removal" +TEMPLATE_DIR = Path(__file__).parent +ISSUE_BODY_TEMPLATE = "issue_body.md.j2" +REMOVAL_PR_BODY_TEMPLATE = "removal_pr_body.md.j2" + +# Maximum issues to check when looking for duplicates (GitHub API max is 100) +MAX_ISSUES_PER_PAGE = 100 +# GitHub API limits assignees to 10 per issue +MAX_ASSIGNEES = 10 + + +def sanitize_branch_name(name: str) -> str: + r"""Sanitize a string to be a valid git branch name. + + Git branch names cannot contain spaces, ~, ^, :, ?, *, [, \\, or + consecutive dots. They also cannot begin/end with dots or slashes. + """ + # Replace spaces and invalid characters with hyphens + sanitized = re.sub(r"[\s~^:?*\[\]\\@{}'\"]+", "-", name) + # Replace consecutive dots with a single dot + sanitized = re.sub(r"\.{2,}", ".", sanitized) + # Remove leading/trailing dots, slashes, and hyphens + sanitized = sanitized.strip(".-/") + # Collapse multiple consecutive hyphens into one + sanitized = re.sub(r"-{2,}", "-", sanitized) + # Convert to lowercase for consistency + sanitized = sanitized.lower() + return sanitized + + +def get_issue_title(component_name: str) -> str: + """Generate the standard issue title for a warning component.""" + return f"Component `{component_name}` needs verification" + + +def get_removal_pr_title(component_name: str) -> str: + """Generate the standard PR title for removing a stale component.""" + return f"chore: Remove stale component `{component_name}`" + + +def get_owners(component_path: Path) -> list[str]: + """Read OWNERS file for a component.""" + owners_file = component_path / "OWNERS" + if not owners_file.exists(): + return [] + try: + owners = yaml.safe_load(owners_file.read_text()) + return owners.get("approvers", []) if owners else [] + except Exception: + print(f"::warning:: Error: Could not read OWNERS file for component {component_path}. ", file=sys.stderr) + print(f"::warning:: No owners will be assigned to the issue for {component_path}.", file=sys.stderr) + return [] + + +def create_issue_body(component: dict, repo_path: Path) -> str: + """Generate the issue body with instructions using Jinja2 template.""" + env = Environment(loader=FileSystemLoader(TEMPLATE_DIR)) + template = env.get_template(ISSUE_BODY_TEMPLATE) + + owners = get_owners(repo_path / component["path"]) + owners_mention = ", ".join(f"@{o}" for o in owners) if owners else "No owners found" + + return template.render( + name=component["name"], + path=component["path"], + last_verified=component["last_verified"], + age_days=component["age_days"], + owners_mention=owners_mention, + today=datetime.now(timezone.utc).strftime("%Y-%m-%d"), + ) + + +def create_removal_pr_body(component: dict, owners: list[str]) -> str: + """Generate the PR body for component removal using Jinja2 template.""" + env = Environment(loader=FileSystemLoader(TEMPLATE_DIR)) + template = env.get_template(REMOVAL_PR_BODY_TEMPLATE) + + owners_mention = ", ".join(f"@{o}" for o in owners) if owners else "No owners found" + + return template.render( + name=component["name"], + path=component["path"], + last_verified=component["last_verified"], + age_days=component["age_days"], + owners_mention=owners_mention, + ) + + +def issue_exists(repo: str, component_name: str, token: str | None) -> bool: + """Check if an open issue already exists for this component.""" + expected_title = get_issue_title(component_name) + headers = {"Accept": "application/vnd.github.v3+json"} + if token: + headers["Authorization"] = f"token {token}" + try: + resp = requests.get( + f"https://api.github.com/repos/{repo}/issues", + headers=headers, + params={"state": "open", "labels": ISSUE_LABEL, "per_page": MAX_ISSUES_PER_PAGE}, + timeout=30, + ) + resp.raise_for_status() + return any(issue["title"] == expected_title for issue in resp.json()) + except Exception as e: + print(f"::error::Failed to check for existing issue: {e}", file=sys.stderr) + print( + "::error:: Cannot verify if issue already exists or not. Will assume it does to prevent " + "creating duplicate issues and skip this component.", + file=sys.stderr, + ) + return True + + +def removal_pr_exists(repo: str, component_name: str) -> bool: + """Check if an open PR already exists for removing this component.""" + expected_title = get_removal_pr_title(component_name) + try: + result = subprocess.run( + ["gh", "pr", "list", "--repo", repo, "--state", "open", "--json", "title"], + capture_output=True, + text=True, + check=True, + ) + prs = json.loads(result.stdout) + return any(pr["title"] == expected_title for pr in prs) + except subprocess.CalledProcessError as e: + print(f"::error::Failed to check for existing PR: {e}", file=sys.stderr) + print( + "::error:: Cannot verify if PR already exists or not. Will assume it does to prevent " + "creating duplicate PRs and skip this component.", + file=sys.stderr, + ) + return True + + +def create_issue(repo: str, component: dict, repo_path: Path, token: str | None, dry_run: bool) -> bool: + """Create a GitHub issue for a component needing verification.""" + title = get_issue_title(component["name"]) + owners = get_owners(repo_path / component["path"]) + assignees = owners[:MAX_ASSIGNEES] + + if dry_run: + print(f"[DRY RUN] Would create issue: {title}") + print(f" Assignees: {assignees}") + return True + + headers = {"Accept": "application/vnd.github.v3+json"} + if token: + headers["Authorization"] = f"token {token}" + body = create_issue_body(component, repo_path) + + try: + resp = requests.post( + f"https://api.github.com/repos/{repo}/issues", + headers=headers, + json={"title": title, "body": body, "labels": [ISSUE_LABEL], "assignees": assignees}, + timeout=30, + ) + resp.raise_for_status() + print(f"Created issue: {resp.json().get('html_url')}") + return True + except requests.exceptions.RequestException as e: + print(f"Failed to create issue for {component['name']}: {e}", file=sys.stderr) + return False + + +def get_current_branch() -> str | None: + """Get the current git branch name, or None if in detached HEAD state.""" + try: + result = subprocess.run( + ["git", "rev-parse", "--abbrev-ref", "HEAD"], + capture_output=True, + text=True, + check=True, + ) + branch = result.stdout.strip() + return None if branch == "HEAD" else branch + except subprocess.CalledProcessError: + return None + + +def create_removal_pr(repo: str, component: dict, repo_path: Path, dry_run: bool) -> bool: + """Create a PR to remove a stale component using gh CLI.""" + name = component["name"] + path = component["path"] + title = get_removal_pr_title(name) + branch_name = f"remove-stale-{sanitize_branch_name(name)}" + owners = get_owners(repo_path / path) + + if dry_run: + print(f"[DRY RUN] Would create removal PR: {title}") + print(f" Branch: {branch_name}") + print(f" Removes: {path}") + print(f" Updates: {Path(path).parent}/README.md (category index)") + print(f" Reviewers: {owners}") + return True + + # Save original branch to restore later + original_branch = get_current_branch() + + branch_pushed = False + try: + # Fetch latest from origin + subprocess.run(["git", "fetch", "origin"], check=True, capture_output=True) + + # Get default branch name + result = subprocess.run( + ["gh", "repo", "view", repo, "--json", "defaultBranchRef", "--jq", ".defaultBranchRef.name"], + capture_output=True, + text=True, + check=True, + ) + default_branch = result.stdout.strip() + + # Create and checkout new branch from default + subprocess.run( + ["git", "checkout", "-B", branch_name, f"origin/{default_branch}"], + check=True, + capture_output=True, + ) + + # Remove the component directory + component_dir = repo_path / path + if component_dir.exists(): + subprocess.run(["git", "rm", "-rf", str(component_dir)], check=True, capture_output=True) + else: + print(f"Component directory not found: {component_dir}", file=sys.stderr) + return False + + # Regenerate category README to remove the component from the index + category_dir = component_dir.parent + is_component = "components" in path + try: + index_generator = CategoryIndexGenerator(category_dir, is_component=is_component) + category_readme_content = index_generator.generate() + category_readme_path = category_dir / "README.md" + category_readme_path.write_text(category_readme_content) + subprocess.run(["git", "add", str(category_readme_path)], check=True, capture_output=True) + except Exception as e: + print(f"Warning: Could not regenerate category README: {e}", file=sys.stderr) + + # Commit the change + commit_msg = ( + f"Remove stale component: {name}\n\nComponent has not been verified in {component['age_days']} days." + ) + subprocess.run(["git", "commit", "-m", commit_msg], check=True, capture_output=True) + + # Push the branch + subprocess.run(["git", "push", "-u", "origin", branch_name, "--force"], check=True, capture_output=True) + branch_pushed = True + + # Create the PR (use owners read before component was removed) + body = create_removal_pr_body(component, owners) + pr_cmd = [ + "gh", + "pr", + "create", + "--repo", + repo, + "--title", + title, + "--body", + body, + "--label", + REMOVAL_LABEL, + ] + + # Add reviewers if we have owners + if owners: + pr_cmd.extend(["--reviewer", ",".join(owners[:MAX_ASSIGNEES])]) + + result = subprocess.run(pr_cmd, capture_output=True, text=True, check=True) + print(f"Created removal PR: {result.stdout.strip()}") + + return True + except subprocess.CalledProcessError as e: + print(f"Failed to create removal PR for {name}: {e}", file=sys.stderr) + if e.stderr: + print(f" stderr: {e.stderr}", file=sys.stderr) + # Clean up the remote branch if it was pushed but PR creation failed + if branch_pushed: + try: + subprocess.run( + ["git", "push", "origin", "--delete", branch_name], + check=True, + capture_output=True, + ) + print(f"Cleaned up orphaned remote branch: {branch_name}") + except subprocess.CalledProcessError: + print(f"::warning:: Failed to clean up remote branch: {branch_name}", file=sys.stderr) + return False + finally: + # Always restore original branch + if original_branch: + subprocess.run(["git", "checkout", original_branch], capture_output=True) + + +def ensure_labels_exist(repo: str, token: str | None, dry_run: bool) -> bool: + """Verify that required GitHub labels exist in the repo. + + Checks for ISSUE_LABEL and REMOVAL_LABEL. Returns True if both + exist (or if running in dry-run mode), False otherwise. + """ + required_labels = [ISSUE_LABEL, REMOVAL_LABEL] + headers = {"Accept": "application/vnd.github.v3+json"} + if token: + headers["Authorization"] = f"token {token}" + + missing = [] + for label in required_labels: + try: + resp = requests.get( + f"https://api.github.com/repos/{repo}/labels/{label}", + headers=headers, + timeout=30, + ) + if resp.status_code == 404: + missing.append(label) + else: + resp.raise_for_status() + except requests.exceptions.RequestException as e: + print(f"::error:: Failed to check for label '{label}': {e}", file=sys.stderr) + return False + + if missing: + msg = ", ".join(f"'{label}'" for label in missing) + if dry_run: + print(f"::warning:: Labels {msg} do not exist in {repo}. They would need to be created before a real run.") + return True + print(f"::error:: Required labels {msg} do not exist in {repo}.", file=sys.stderr) + print("::error:: Create the missing labels and re-run.", file=sys.stderr) + return False + + print(f"Preflight: all required labels exist ({', '.join(required_labels)})") + return True + + +def handle_stale_components(repo: str, token: str | None, dry_run: bool) -> bool: + """Handle stale components: issues for warnings, removal PRs for stale. + + Returns True if all operations succeeded, False if any failed. + """ + if not ensure_labels_exist(repo, token, dry_run): + return False + + repo_path = REPO_ROOT + results = scan_repo(repo_path) + # Include both warning (270-360 days) and stale (>360 days) components + fully_stale_components = results.get("stale", []) + components_needing_attention = results.get("warning", []) + fully_stale_components + + if not components_needing_attention and not fully_stale_components: + print("No components need attention.") + return True + + print( + f"Found {len(components_needing_attention)} components needing attention, including " + f"{len(fully_stale_components)} fully stale components that should be flagged for removal\n" + ) + + all_succeeded = True + issues_failed = 0 + prs_failed = 0 + + # Handle warning components: create removal warning Issues + if components_needing_attention: + print("=== Warning Components (creating issues) ===") + issues_created, issues_skipped = 0, 0 + for component in components_needing_attention: + if issue_exists(repo, component["name"], token): + print(f"Skipping {component['name']}: issue already exists") + issues_skipped += 1 + continue + if create_issue(repo, component, repo_path, token, dry_run): + issues_created += 1 + else: + issues_failed += 1 + all_succeeded = False + print(f"Issues: {issues_created} created, {issues_skipped} skipped, {issues_failed} failed\n") + + # Handle stale components: create stale component removal PRs + if fully_stale_components: + print("=== Stale Components (creating removal PRs) ===") + prs_created, prs_skipped = 0, 0 + for component in fully_stale_components: + if removal_pr_exists(repo, component["name"]): + print(f"Skipping {component['name']}: removal PR already exists") + prs_skipped += 1 + continue + if create_removal_pr(repo, component, repo_path, dry_run): + prs_created += 1 + else: + prs_failed += 1 + all_succeeded = False + print(f"Removal PRs: {prs_created} created, {prs_skipped} skipped, {prs_failed} failed") + + return all_succeeded + + +def main(): + """Handle stale components: create issues for warnings, and removal PRs if stale.""" + parser = argparse.ArgumentParser( + description="Handle stale components: create issues for warnings, and removal PRs if stale." + ) + parser.add_argument("--repo", required=True, help="GitHub repo (e.g., owner/repo)") + parser.add_argument("--token", help="GitHub token (or set GITHUB_TOKEN env var)") + parser.add_argument("--dry-run", action="store_true", help="Print without creating issues/PRs") + args = parser.parse_args() + + token = args.token or os.environ.get("GITHUB_TOKEN") + if not token: + if args.dry_run: + print( + "::warning:: Warning: No GitHub token provided. API requests will be subject to rate limiting.", + file=sys.stderr, + ) + print( + "::warning:: Use --token or set GITHUB_TOKEN environment variable for authenticated requests.", + file=sys.stderr, + ) + else: + print( + "::error:: Error: Required GitHub token not provided. " + "Will not be able to create staleness Issues and PRs.", + file=sys.stderr, + ) + print( + "::error:: Use --token or set GITHUB_TOKEN environment variable for authenticated requests.", + file=sys.stderr, + ) + print("::error:: Stopping...", file=sys.stderr) + sys.exit(1) + + success = handle_stale_components(args.repo, token, args.dry_run) + if not success: + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/.github/scripts/stale_component_handler/tests/__init__.py b/.github/scripts/stale_component_handler/tests/__init__.py new file mode 100644 index 00000000..d27675ab --- /dev/null +++ b/.github/scripts/stale_component_handler/tests/__init__.py @@ -0,0 +1 @@ +# Tests for stale_component_handler module diff --git a/.github/scripts/stale_component_handler/tests/test_stale_component_handler.py b/.github/scripts/stale_component_handler/tests/test_stale_component_handler.py new file mode 100644 index 00000000..ccd55bce --- /dev/null +++ b/.github/scripts/stale_component_handler/tests/test_stale_component_handler.py @@ -0,0 +1,325 @@ +#!/usr/bin/env python3 +"""Unit tests for stale_component_handler.py.""" + +from __future__ import annotations + +import json +import subprocess +from unittest.mock import MagicMock, patch + +import pytest +import requests +import yaml + +# Import the module object so we can use patch.object on it. +# String-based @patch() paths don't work because ".github" in the +# filesystem path confuses Python's resolve_name. +from .. import stale_component_handler as sch + +# Also import individual functions for direct calls +from ..stale_component_handler import ( + get_issue_title, + get_removal_pr_title, + sanitize_branch_name, +) + + +class TestSanitizeBranchName: + """Tests for sanitize_branch_name.""" + + @pytest.mark.parametrize( + "input_name, expected", + [ + ("my-component", "my-component"), + ("my component name", "my-component-name"), + ("comp~name^v2", "comp-name-v2"), + ("comp..name...test", "comp.name.test"), + (".comp-name.", "comp-name"), + ("comp---name", "comp-name"), + ("My-Component", "my-component"), + (" My Comp[lex]*Na?me ", "my-comp-lex-na-me"), + ("...", ""), + ], + ) + def test_sanitize(self, input_name, expected): + """Verify branch name sanitization for various inputs.""" + assert sanitize_branch_name(input_name) == expected + + +class TestTitleGenerators: + """Tests for issue and PR title generation.""" + + def test_get_issue_title(self): + """Verify issue title format.""" + assert get_issue_title("my-comp") == "Component `my-comp` needs verification" + + def test_get_removal_pr_title(self): + """Verify removal PR title format.""" + assert get_removal_pr_title("my-comp") == "chore: Remove stale component `my-comp`" + + +class TestGetOwners: + """Tests for get_owners.""" + + def test_returns_approvers(self, tmp_path): + """Return approvers list from a valid OWNERS file.""" + owners_data = {"approvers": ["alice", "bob"]} + (tmp_path / "OWNERS").write_text(yaml.dump(owners_data)) + assert sch.get_owners(tmp_path) == ["alice", "bob"] + + def test_missing_owners_file(self, tmp_path): + """Return empty list when OWNERS file does not exist.""" + assert sch.get_owners(tmp_path) == [] + + def test_empty_owners_file(self, tmp_path): + """Return empty list when OWNERS file is empty.""" + (tmp_path / "OWNERS").write_text("") + assert sch.get_owners(tmp_path) == [] + + def test_no_approvers_key(self, tmp_path): + """Return empty list when OWNERS file has no approvers key.""" + (tmp_path / "OWNERS").write_text(yaml.dump({"reviewers": ["alice"]})) + assert sch.get_owners(tmp_path) == [] + + def test_malformed_yaml(self, tmp_path): + """Return empty list when OWNERS file contains invalid YAML.""" + (tmp_path / "OWNERS").write_text("::not::valid::yaml[[[") + assert sch.get_owners(tmp_path) == [] + + +class TestIssueExists: + """Tests for issue_exists.""" + + def test_returns_true_when_matching_issue_found(self): + """Return True when an open issue with the expected title exists.""" + mock_resp = MagicMock() + mock_resp.json.return_value = [{"title": get_issue_title("my-comp")}] + mock_resp.raise_for_status = MagicMock() + + with patch.object(sch.requests, "get", return_value=mock_resp): + assert sch.issue_exists("owner/repo", "my-comp", "fake-token") is True + + def test_returns_false_when_no_match(self): + """Return False when no open issue matches the expected title.""" + mock_resp = MagicMock() + mock_resp.json.return_value = [{"title": "Some other issue"}] + mock_resp.raise_for_status = MagicMock() + + with patch.object(sch.requests, "get", return_value=mock_resp): + assert sch.issue_exists("owner/repo", "my-comp", "fake-token") is False + + def test_returns_true_on_api_error(self): + """On failure, assume issue exists to prevent duplicates.""" + with patch.object(sch.requests, "get", side_effect=requests.exceptions.ConnectionError("fail")): + assert sch.issue_exists("owner/repo", "my-comp", "fake-token") is True + + def test_no_auth_header_without_token(self): + """Omit Authorization header when no token is provided.""" + mock_resp = MagicMock() + mock_resp.json.return_value = [] + mock_resp.raise_for_status = MagicMock() + + with patch.object(sch.requests, "get", return_value=mock_resp) as mock_get: + sch.issue_exists("owner/repo", "my-comp", None) + _, kwargs = mock_get.call_args + assert "Authorization" not in kwargs["headers"] + + +class TestRemovalPrExists: + """Tests for removal_pr_exists.""" + + def test_returns_true_when_matching_pr_found(self): + """Return True when an open PR with the expected title exists.""" + mock_result = MagicMock( + stdout=json.dumps([{"title": get_removal_pr_title("my-comp")}]), + ) + with patch.object(sch.subprocess, "run", return_value=mock_result): + assert sch.removal_pr_exists("owner/repo", "my-comp") is True + + def test_returns_false_when_no_match(self): + """Return False when no open PR matches the expected title.""" + mock_result = MagicMock(stdout=json.dumps([])) + with patch.object(sch.subprocess, "run", return_value=mock_result): + assert sch.removal_pr_exists("owner/repo", "my-comp") is False + + def test_returns_true_on_cli_failure(self): + """On failure, assume PR exists to prevent duplicates.""" + with patch.object(sch.subprocess, "run", side_effect=subprocess.CalledProcessError(1, "gh")): + assert sch.removal_pr_exists("owner/repo", "my-comp") is True + + +class TestGetCurrentBranch: + """Tests for get_current_branch.""" + + def test_returns_branch_name(self): + """Return the branch name when on a named branch.""" + with patch.object(sch.subprocess, "run", return_value=MagicMock(stdout="main\n")): + assert sch.get_current_branch() == "main" + + def test_returns_none_for_detached_head(self): + """Return None when in detached HEAD state.""" + with patch.object(sch.subprocess, "run", return_value=MagicMock(stdout="HEAD\n")): + assert sch.get_current_branch() is None + + def test_returns_none_on_error(self): + """Return None when git command fails.""" + with patch.object(sch.subprocess, "run", side_effect=subprocess.CalledProcessError(1, "git")): + assert sch.get_current_branch() is None + + +class TestEnsureLabelsExist: + """Tests for ensure_labels_exist.""" + + def test_both_labels_exist(self): + """Pass when both required labels exist in the repo.""" + mock_resp = MagicMock(status_code=200, raise_for_status=MagicMock()) + + with patch.object(sch.requests, "get", return_value=mock_resp) as mock_get: + assert sch.ensure_labels_exist("owner/repo", "fake-token", dry_run=False) is True + assert mock_get.call_count == 2 + + def test_missing_label_fails(self): + """Fail when a required label is missing.""" + responses = [ + MagicMock(status_code=200, raise_for_status=MagicMock()), + MagicMock(status_code=404), + ] + with patch.object(sch.requests, "get", side_effect=responses): + assert sch.ensure_labels_exist("owner/repo", "fake-token", dry_run=False) is False + + def test_missing_label_warns_in_dry_run(self): + """Warn but pass when a label is missing in dry-run mode.""" + responses = [ + MagicMock(status_code=200, raise_for_status=MagicMock()), + MagicMock(status_code=404), + ] + with patch.object(sch.requests, "get", side_effect=responses): + assert sch.ensure_labels_exist("owner/repo", "fake-token", dry_run=True) is True + + def test_api_error_fails(self): + """Fail when the GitHub API request errors out.""" + with patch.object(sch.requests, "get", side_effect=requests.exceptions.ConnectionError("fail")): + assert sch.ensure_labels_exist("owner/repo", "fake-token", dry_run=False) is False + + +class TestCreateIssue: + """Tests for create_issue.""" + + COMPONENT = { + "name": "my-comp", + "path": "components/category/my-comp", + "last_verified": "2024-01-01", + "age_days": 300, + } + + def test_dry_run_returns_true(self, tmp_path): + """Return True without calling the API in dry-run mode.""" + comp_dir = tmp_path / "components" / "category" / "my-comp" + comp_dir.mkdir(parents=True) + assert sch.create_issue("owner/repo", self.COMPONENT, tmp_path, "fake-token", dry_run=True) is True + + def test_success(self, tmp_path): + """Return True when issue creation succeeds.""" + comp_dir = tmp_path / "components" / "category" / "my-comp" + comp_dir.mkdir(parents=True) + + mock_resp = MagicMock() + mock_resp.json.return_value = {"html_url": "https://github.com/owner/repo/issues/1"} + mock_resp.raise_for_status = MagicMock() + + with ( + patch.object(sch.requests, "post", return_value=mock_resp) as mock_post, + patch.object(sch, "create_issue_body", return_value="body text"), + ): + assert sch.create_issue("owner/repo", self.COMPONENT, tmp_path, "fake-token", dry_run=False) is True + mock_post.assert_called_once() + + def test_api_failure_returns_false(self, tmp_path): + """Return False when the API request fails.""" + comp_dir = tmp_path / "components" / "category" / "my-comp" + comp_dir.mkdir(parents=True) + + with ( + patch.object(sch.requests, "post", side_effect=requests.exceptions.HTTPError("403")), + patch.object(sch, "create_issue_body", return_value="body text"), + ): + assert sch.create_issue("owner/repo", self.COMPONENT, tmp_path, "fake-token", dry_run=False) is False + + +class TestCreateRemovalPrCleanup: + """Tests for create_removal_pr orphaned branch cleanup.""" + + COMPONENT = { + "name": "my-comp", + "path": "components/category/my-comp", + "last_verified": "2024-01-01", + "age_days": 400, + } + + def test_dry_run_returns_true(self, tmp_path): + """Return True without side effects in dry-run mode.""" + comp_dir = tmp_path / self.COMPONENT["path"] + comp_dir.mkdir(parents=True) + assert sch.create_removal_pr("owner/repo", self.COMPONENT, tmp_path, dry_run=True) is True + + def test_cleans_up_remote_branch_on_pr_failure(self, tmp_path): + """When push succeeds but gh pr create fails, the remote branch should be deleted.""" + comp_dir = tmp_path / self.COMPONENT["path"] + comp_dir.mkdir(parents=True) + + def side_effect(cmd, **kwargs): + cmd_list = cmd if isinstance(cmd, list) else [cmd] + # gh repo view (get default branch) + if "gh" in cmd_list and "repo" in cmd_list and "view" in cmd_list: + return MagicMock(stdout="main\n") + # gh pr create – simulate failure + if "gh" in cmd_list and "pr" in cmd_list and "create" in cmd_list: + raise subprocess.CalledProcessError(1, cmd, stderr="PR creation failed") + # git push --delete (cleanup) – should succeed + if "push" in cmd_list and "--delete" in cmd_list: + return MagicMock(returncode=0) + # All other git commands succeed + return MagicMock(returncode=0) + + with ( + patch.object(sch.subprocess, "run", side_effect=side_effect) as mock_run, + patch.object(sch, "get_current_branch", return_value="main"), + patch.object(sch, "get_owners", return_value=["alice"]), + ): + result = sch.create_removal_pr("owner/repo", self.COMPONENT, tmp_path, dry_run=False) + assert result is False + + # Verify cleanup was attempted + delete_calls = [c for c in mock_run.call_args_list if "--delete" in (c[0][0] if c[0] else [])] + assert len(delete_calls) == 1 + assert "remove-stale-my-comp" in delete_calls[0][0][0] + + def test_no_cleanup_when_push_fails(self, tmp_path): + """When push itself fails, no remote cleanup should be attempted.""" + comp_dir = tmp_path / self.COMPONENT["path"] + comp_dir.mkdir(parents=True) + + def side_effect(cmd, **kwargs): + cmd_list = cmd if isinstance(cmd, list) else [cmd] + if "gh" in cmd_list and "repo" in cmd_list and "view" in cmd_list: + return MagicMock(stdout="main\n") + # git push – simulate failure + if "push" in cmd_list and "-u" in cmd_list: + raise subprocess.CalledProcessError(1, cmd, stderr="push failed") + return MagicMock(returncode=0) + + with ( + patch.object(sch.subprocess, "run", side_effect=side_effect) as mock_run, + patch.object(sch, "get_current_branch", return_value="main"), + patch.object(sch, "get_owners", return_value=[]), + ): + result = sch.create_removal_pr("owner/repo", self.COMPONENT, tmp_path, dry_run=False) + assert result is False + + # No --delete call should have been made + delete_calls = [c for c in mock_run.call_args_list if "--delete" in (c[0][0] if c[0] else [])] + assert len(delete_calls) == 0 + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/.github/workflows/stale-component-check.yml b/.github/workflows/stale-component-check.yml new file mode 100644 index 00000000..0fc42cb1 --- /dev/null +++ b/.github/workflows/stale-component-check.yml @@ -0,0 +1,56 @@ +name: Stale Component Check + +on: + schedule: + # Run weekly on Monday at 9:00 AM UTC + - cron: '0 9 * * 1' + workflow_dispatch: + inputs: + dry_run: + description: 'Dry run (do not create issues/PRs)' + required: false + default: false + type: boolean + +jobs: + check-stale-components: + name: Handle stale components + runs-on: ubuntu-24.04 + permissions: + issues: write + contents: write + pull-requests: write + + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup Python CI + uses: ./.github/actions/setup-python-ci + with: + python-version: 3.11 + + - name: Configure git + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + + - name: Handle stale components + if: github.event.inputs.dry_run != 'true' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PYTHONPATH: .github/scripts + run: | + uv run .github/scripts/stale_component_handler/stale_component_handler.py \ + --repo ${{ github.repository }} + + - name: Dry run (show what would be created) + if: github.event.inputs.dry_run == 'true' + env: + PYTHONPATH: .github/scripts + run: | + uv run .github/scripts/stale_component_handler/stale_component_handler.py \ + --repo ${{ github.repository }} \ + --dry-run