diff --git a/.codegen/.gitignore b/.codegen/.gitignore new file mode 100644 index 000000000..ab273ff10 --- /dev/null +++ b/.codegen/.gitignore @@ -0,0 +1,17 @@ +# Codegen +docs/ +examples/ +prompts/ +jupyter/ +.venv/ +codegen-system-prompt.txt + +# Python cache files +__pycache__/ +*.py[cod] +*$py.class + +# Keep config.toml and codemods +!config.toml +!codemods/ +!codemods/** diff --git a/.codegen/config.toml b/.codegen/config.toml new file mode 100644 index 000000000..1b9d56783 --- /dev/null +++ b/.codegen/config.toml @@ -0,0 +1,2 @@ +organization_name = "codegen-sh" +repo_name = "codegen-sdk" diff --git a/.gitignore b/.gitignore index ef6b42612..1f54b61cc 100644 --- a/.gitignore +++ b/.gitignore @@ -64,5 +64,4 @@ graph-sitter-types/out/** graph-sitter-types/typings/** coverage.json tests/integration/verified_codemods/codemod_data/repo_commits.json -.codegen/* .benchmarks/* diff --git a/src/codegen/cli/cli.py b/src/codegen/cli/cli.py index ce7e28b13..4cd767bd8 100644 --- a/src/codegen/cli/cli.py +++ b/src/codegen/cli/cli.py @@ -10,6 +10,7 @@ from codegen.cli.commands.logout.main import logout_command from codegen.cli.commands.notebook.main import notebook_command from codegen.cli.commands.profile.main import profile_command +from codegen.cli.commands.reset.main import reset_command from codegen.cli.commands.run.main import run_command from codegen.cli.commands.run_on_pr.main import run_on_pr_command from codegen.cli.commands.style_debug.main import style_debug_command @@ -37,6 +38,7 @@ def main(): main.add_command(style_debug_command) main.add_command(run_on_pr_command) main.add_command(notebook_command) +main.add_command(reset_command) if __name__ == "__main__": diff --git a/src/codegen/cli/commands/reset/main.py b/src/codegen/cli/commands/reset/main.py new file mode 100644 index 000000000..eb78e1527 --- /dev/null +++ b/src/codegen/cli/commands/reset/main.py @@ -0,0 +1,102 @@ +from pathlib import Path + +import click +from pygit2.enums import FileStatus, ResetMode +from pygit2.repository import Repository + +from codegen.cli.auth.constants import CODEGEN_DIR +from codegen.cli.git.repo import get_git_repo + + +def is_codegen_file(filepath: Path) -> bool: + """Check if a file is in the .codegen directory.""" + return CODEGEN_DIR in filepath.parents + + +def backup_codegen_files(repo: Repository) -> dict[str, tuple[bytes | None, bool]]: + """Backup .codegen files and track if they were staged. + + Returns: + Dict mapping filepath to (content, was_staged) tuple. + content is None for deleted files. + """ + codegen_changes = {} + for filepath, status in repo.status().items(): + if not is_codegen_file(Path(filepath)): + continue + + was_staged = bool(status & (FileStatus.INDEX_MODIFIED | FileStatus.INDEX_NEW | FileStatus.INDEX_DELETED | FileStatus.INDEX_RENAMED)) + + # Handle deleted files + if status & (FileStatus.WT_DELETED | FileStatus.INDEX_DELETED): + codegen_changes[filepath] = (None, was_staged) + continue + # Handle modified, new, or renamed files + if status & (FileStatus.WT_MODIFIED | FileStatus.WT_NEW | FileStatus.INDEX_MODIFIED | FileStatus.INDEX_NEW | FileStatus.INDEX_RENAMED): + file_path = Path(repo.workdir) / filepath + if file_path.exists(): # Only read if file exists + codegen_changes[filepath] = (file_path.read_bytes(), was_staged) + + return codegen_changes + + +def restore_codegen_files(repo: Repository, codegen_changes: dict[str, tuple[bytes | None, bool]]) -> None: + """Restore backed up .codegen files and their staged status.""" + for filepath, (content, was_staged) in codegen_changes.items(): + file_path = Path(repo.workdir) / filepath + + if content is None: # Handle deleted files + if file_path.exists(): + file_path.unlink() + if was_staged: + repo.index.remove(filepath) + else: # Handle existing files + file_path.parent.mkdir(parents=True, exist_ok=True) + file_path.write_bytes(content) + if was_staged: + repo.index.add(filepath) + + if codegen_changes: + repo.index.write() + + +def remove_untracked_files(repo: Repository) -> None: + """Remove untracked files except those in .codegen directory.""" + for filepath, status in repo.status().items(): + if not is_codegen_file(Path(filepath)) and status & FileStatus.WT_NEW: + file_path = Path(repo.workdir) / filepath + if file_path.exists(): # Only try to remove if file exists + if file_path.is_file(): + file_path.unlink() + elif file_path.is_dir(): + file_path.rmdir() + + +@click.command(name="reset") +def reset_command() -> None: + """Reset git repository while preserving all files in .codegen directory""" + repo = get_git_repo() + if not repo: + click.echo("Not a git repository", err=True) + return + + try: + # Backup .codegen files and their staged status + codegen_changes = backup_codegen_files(repo) + + # Reset everything + repo.reset(repo.head.target, ResetMode.HARD) + + # Restore .codegen files and their staged status + restore_codegen_files(repo, codegen_changes) + + # Remove untracked files except .codegen + remove_untracked_files(repo) + + click.echo(f"Reset complete. Repository has been restored to HEAD (preserving {CODEGEN_DIR}) and untracked files have been removed (except {CODEGEN_DIR})") + except Exception as e: + click.echo(f"Error: {e}", err=True) + + +if __name__ == "__main__": + reset_command() diff --git a/tests/unit/codegen/cli/conftest.py b/tests/unit/codegen/cli/conftest.py new file mode 100644 index 000000000..db8cd312f --- /dev/null +++ b/tests/unit/codegen/cli/conftest.py @@ -0,0 +1,33 @@ +import os +import subprocess +from pathlib import Path + +import pytest +from click.testing import CliRunner + +from codegen.cli.commands.init.main import init_command + + +@pytest.fixture +def sample_repository(tmp_path: Path): + os.chdir(tmp_path) + subprocess.run(["git", "init", str(tmp_path)], check=True) + subprocess.run(["git", "config", "--local", "user.email", "test@example.com"], check=True) + subprocess.run(["git", "config", "--local", "user.name", "Test User"], check=True) + subprocess.run(["git", "commit", "--allow-empty", "-m", "Initial commit"], check=True) + subprocess.run(["git", "remote", "add", "origin", "https://github.com/test/test.git"], check=True) + return tmp_path + + +@pytest.fixture() +def runner(): + return CliRunner(mix_stderr=False) + + +@pytest.fixture +def initialized_repo(sample_repository: Path, runner: CliRunner): + os.chdir(sample_repository) + runner.invoke(init_command) + subprocess.run(["git", "add", "."], cwd=sample_repository, check=True) + subprocess.run(["git", "commit", "-m", "Initialize codegen"], cwd=sample_repository, check=True) + return sample_repository diff --git a/tests/unit/codegen/cli/test_reset.py b/tests/unit/codegen/cli/test_reset.py new file mode 100644 index 000000000..cbbbb19fd --- /dev/null +++ b/tests/unit/codegen/cli/test_reset.py @@ -0,0 +1,390 @@ +import subprocess +from dataclasses import dataclass +from pathlib import Path + +import pytest +from click.testing import CliRunner + +from codegen.cli.commands.reset.main import reset_command + + +@dataclass +class ResetTestCase: + """Test case for reset command""" + + name: str + changes: dict[str, str | None] + stage: bool + expected_content: dict[str, str | None] + expected_staged: set[str] + expected_modified: set[str] + expected_untracked: set[str] + rename_pairs: list[tuple[str, str]] + + +@pytest.fixture +def committed_state() -> dict[str, str]: + """Base state that will be committed""" + return { + "README.md": "Base README", + "src/hello.py": "def hello():\n print('Original hello')", + ".codegen/codemods/base.py": """ +def base(): + pass + +class MyClass: + pass +""", + } + + +@pytest.fixture +def committed_repo(initialized_repo: Path, committed_state: dict[str, str]) -> Path: + """Repo with committed_state committed""" + setup_repo_state(initialized_repo, committed_state) + subprocess.run(["git", "add", "."], cwd=initialized_repo, check=True) + subprocess.run(["git", "commit", "-m", "Initial commit"], cwd=initialized_repo, check=True) + return initialized_repo + + +def setup_repo_state(repo_path: Path, state: dict[str, str]): + """Helper to set up files in the repo""" + for filepath, content in state.items(): + file_path = repo_path / filepath + file_path.parent.mkdir(parents=True, exist_ok=True) + if content is None: + file_path.unlink() + else: + file_path.write_text(content) + + +def get_git_status(repo_path: Path) -> tuple[set[str], set[str], set[str]]: + """Returns sets of (staged_files, modified_files, untracked_files)""" + result = subprocess.run( + ["git", "status", "--porcelain"], + cwd=repo_path, + check=True, + capture_output=True, + text=True, + ) + + staged_files = set() + modified_files = set() + untracked_files = set() + + for line in result.stdout.splitlines(): + if not line: + continue + status = line[:2] + file_path = line[3:] + + # Staged changes + if status[0] in {"M", "A", "D", "R"}: + staged_files.add(file_path) + # Unstaged changes + if status[1] in {"M", "D"}: + modified_files.add(file_path) + # Untracked files + if status == "??": + untracked_files.add(file_path) + + return staged_files, modified_files, untracked_files + + +def verify_git_state( + repo_path: Path, expected_staged: set[str] | None = None, expected_modified: set[str] | None = None, expected_untracked: set[str] | None = None, rename_pairs: list[tuple[str, str]] | None = None +): + """Verify git status matches expected state""" + if rename_pairs is not None: + for old_path, new_path in rename_pairs: + corrected = f"{old_path} -> {new_path}" + if expected_staged is not None and old_path in expected_staged: + assert new_path in expected_staged, f"Expected {old_path} to be renamed to {new_path}, but it was not staged" + expected_staged.remove(old_path) + expected_staged.remove(new_path) + expected_staged.add(corrected) + + staged, modified, untracked = get_git_status(repo_path) + + if expected_staged is not None: + assert staged == expected_staged, f"Staged files mismatch.\nExpected: {expected_staged}\nActual: {staged}" + if expected_modified is not None: + assert modified == expected_modified, f"Modified files mismatch.\nExpected: {expected_modified}\nActual: {modified}" + if expected_untracked is not None: + assert untracked == expected_untracked, f"Untracked files mismatch.\nExpected: {expected_untracked}\nActual: {untracked}" + + +def verify_repo_state(repo_path: Path, expected_content: dict[str, str | None]): + """Verify file contents in repo""" + for path, content in expected_content.items(): + file_path = repo_path / path + if content is None: + assert not file_path.exists(), f"File {path} should not exist" + else: + assert file_path.read_text() == content, f"File {path} has wrong content" + + +def create_test_case( + name: str, + changes: dict[str, str | None], + stage: bool, + committed_state: dict[str, str], + expected_content: dict[str, str | None] | None = None, + expected_staged: set[str] | None = None, + expected_modified: set[str] | None = None, + expected_untracked: set[str] | None = None, + rename_pairs: list[tuple[str, str]] = [], +) -> ResetTestCase: + """Helper to create test cases with defaults""" + if expected_content is None: + expected_content = {} + for path, content in changes.items(): + if path.startswith(".codegen"): + expected_content[path] = content + else: + expected_content[path] = committed_state.get(path) + + return ResetTestCase( + name=name, + changes=changes, + stage=stage, + expected_content=expected_content, + expected_staged=expected_staged or set(), + expected_modified=expected_modified or set(), + expected_untracked=expected_untracked or set(), + rename_pairs=rename_pairs, + ) + + +@pytest.mark.parametrize( + "test_case", + [ + pytest.param( + lambda committed_state: create_test_case( + name="unstaged modifications", + changes={ + "README.md": "Modified README", + "src/hello.py": "def hello():\n print('Modified hello')", + ".codegen/codemods/base.py": "def base():\n print('Modified base')", + }, + stage=False, + committed_state=committed_state, + expected_modified={".codegen/codemods/base.py"}, + ), + id="unstaged_modifications", + ), + pytest.param( + lambda committed_state: create_test_case( + name="unstaged new files", + changes={ + "new.py": "print('new')", + ".codegen/codemods/new.py": "print('new in codegen')", + }, + stage=False, + committed_state=committed_state, + expected_content={ + "new.py": None, + ".codegen/codemods/new.py": "print('new in codegen')", + }, + expected_untracked={".codegen/codemods/new.py"}, + ), + id="unstaged_new_files", + ), + pytest.param( + lambda committed_state: create_test_case( + name="staged changes", + changes={ + "README.md": "Staged README", + "src/hello.py": "def hello():\n print('Staged hello')", + ".codegen/codemods/base.py": "def base():\n print('Staged base')", + "new_staged.py": "print('new staged')", + ".codegen/codemods/new_staged.py": "print('new staged in codegen')", + }, + stage=True, + committed_state=committed_state, + expected_staged={".codegen/codemods/base.py", ".codegen/codemods/new_staged.py"}, + ), + id="staged_changes", + ), + pytest.param( + lambda committed_state: create_test_case( + name="staged deletions", + changes={ + "README.md": None, + "src/hello.py": None, + ".codegen/codemods/base.py": None, + }, + stage=True, + committed_state=committed_state, + expected_staged={".codegen/codemods/base.py"}, + ), + id="staged_deletions", + ), + pytest.param( + lambda committed_state: create_test_case( + name="staged renames", + changes={ + ".codegen/codemods/base.py": None, # Delete original + ".codegen/codemods/renamed_base.py": committed_state[".codegen/codemods/base.py"], # Add with same content + }, + stage=True, + committed_state=committed_state, + expected_staged={".codegen/codemods/base.py", ".codegen/codemods/renamed_base.py"}, + rename_pairs=[(".codegen/codemods/base.py", ".codegen/codemods/renamed_base.py")], + ), + id="staged_renames", + ), + pytest.param( + lambda committed_state: create_test_case( + name="unstaged renames", + changes={ + ".codegen/codemods/base.py": None, # Delete original + ".codegen/codemods/renamed_base.py": committed_state[".codegen/codemods/base.py"], # Add with same content + }, + stage=False, + committed_state=committed_state, + expected_modified={".codegen/codemods/base.py"}, + expected_untracked={".codegen/codemods/renamed_base.py"}, + rename_pairs=[(".codegen/codemods/base.py", ".codegen/codemods/renamed_base.py")], + ), + id="unstaged_renames", + ), + pytest.param( + lambda committed_state: create_test_case( + name="staged rename with modifications", + changes={ + ".codegen/codemods/base.py": None, # Delete original + ".codegen/codemods/renamed_base.py": committed_state[".codegen/codemods/base.py"] + "\n# Modified", # Add with modified content + }, + stage=True, + committed_state=committed_state, + expected_staged={".codegen/codemods/base.py", ".codegen/codemods/renamed_base.py"}, + rename_pairs=[(".codegen/codemods/base.py", ".codegen/codemods/renamed_base.py")], + ), + id="staged_rename_with_modifications", + ), + ], +) +def test_reset(committed_repo: Path, committed_state: dict[str, str], test_case: ResetTestCase, runner: CliRunner): + """Test reset command with various scenarios""" + # Get test case from factory function if needed + if callable(test_case): + test_case = test_case(committed_state) + + # Set up test state + if test_case.changes: + changes = {k: v for k, v in test_case.changes.items() if v is not None} + if changes: + setup_repo_state(committed_repo, changes) + + # Handle deletions + for path, content in test_case.changes.items(): + if content is None: + (committed_repo / path).unlink() + if test_case.stage: + subprocess.run(["git", "add", "-A"], cwd=committed_repo, check=True) + + # Run reset + result = runner.invoke(reset_command, catch_exceptions=False) + print(result.output) + + # Verify state + verify_repo_state(committed_repo, test_case.expected_content) + verify_git_state( + committed_repo, + expected_staged=test_case.expected_staged, + expected_modified=test_case.expected_modified, + expected_untracked=test_case.expected_untracked, + rename_pairs=test_case.rename_pairs, + ) + + +def test_reset_with_mixed_states(committed_repo: Path, committed_state: dict[str, str], runner: CliRunner): + """Test reset with a mix of staged, unstaged, and untracked changes""" + # 1. Staged modifications + staged_changes = { + "README.md": "Staged README", + ".codegen/codemods/base.py": "def base():\n print('Staged base')", + } + setup_repo_state(committed_repo, staged_changes) + subprocess.run(["git", "add", "."], cwd=committed_repo, check=True) + + # 2. Unstaged modifications + unstaged_changes = { + "README.md": "Unstaged README", + "src/hello.py": "def hello():\n print('Unstaged hello')", + ".codegen/codemods/base.py": "def base():\n print('Unstaged base')", + } + setup_repo_state(committed_repo, unstaged_changes) + + # 3. Untracked files + untracked_changes = { + "untracked.py": "print('untracked')", + ".codegen/codemods/untracked.py": "print('untracked in codegen')", + } + setup_repo_state(committed_repo, untracked_changes) + + # Run reset + runner.invoke(reset_command) + + # Verify state + verify_repo_state( + committed_repo, + { + "README.md": committed_state["README.md"], + "src/hello.py": committed_state["src/hello.py"], + "untracked.py": None, + ".codegen/codemods/base.py": unstaged_changes[".codegen/codemods/base.py"], + ".codegen/codemods/untracked.py": untracked_changes[".codegen/codemods/untracked.py"], + }, + ) + + # Verify git state + verify_git_state( + committed_repo, + expected_staged={".codegen/codemods/base.py"}, + expected_modified=set(), + expected_untracked={".codegen/codemods/untracked.py"}, + ) + + +def test_reset_with_mixed_renames(committed_repo: Path, committed_state: dict[str, str], runner: CliRunner): + """Test reset with a mix of staged and unstaged renames""" + # 1. Staged rename + staged_changes = { + ".codegen/codemods/base.py": None, + ".codegen/codemods/staged_rename.py": committed_state[".codegen/codemods/base.py"], + } + setup_repo_state(committed_repo, staged_changes) + subprocess.run(["git", "add", "."], cwd=committed_repo, check=True) + + # 2. Unstaged rename + unstaged_changes = { + "README.md": None, + "README.mdx": committed_state["README.md"], + } + setup_repo_state(committed_repo, unstaged_changes) + # Don't stage these changes + + # Run reset + runner.invoke(reset_command) + + # Verify state + verify_repo_state( + committed_repo, + { + ".codegen/codemods/base.py": None, + ".codegen/codemods/staged_rename.py": committed_state[".codegen/codemods/base.py"], + "README.md": committed_state["README.md"], + "README.mdx": None, + }, + ) + + # Verify git state + verify_git_state( + committed_repo, + expected_staged={".codegen/codemods/base.py", ".codegen/codemods/staged_rename.py"}, + expected_modified=set(), + expected_untracked=set(), + rename_pairs=[(".codegen/codemods/base.py", ".codegen/codemods/staged_rename.py")], + )