Skip to content

Commit 8cc0c33

Browse files
committed
Updated api, better case handling
1 parent 61930e9 commit 8cc0c33

File tree

4 files changed

+55
-97
lines changed

4 files changed

+55
-97
lines changed

docs/building-with-codegen/parsing-codebases.mdx

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,20 +34,20 @@ codebase = Codebase("./")
3434

3535
## Remote Repositories
3636

37-
To fetch and parse a repository directly from GitHub, use the `fetch_codebase` function.
37+
To fetch and parse a repository directly from GitHub, use the `from_repo` function.
3838

3939
```python
4040
import codegen
4141

4242
# Fetch and parse a repository (defaults to /tmp/codegen/{repo_name})
43-
codebase = codegen.fetch_codebase('fastapi/fastapi')
43+
codebase = codegen.from_repo('fastapi/fastapi')
4444

4545
# Customize temp directory, clone depth, or specific commit
46-
codebase = codegen.fetch_codebase(
46+
codebase = codegen.from_repo(
4747
'fastapi/fastapi',
4848
tmp_dir='/custom/temp/dir', # Optional: custom temp directory
49+
commit='786a8ada7ed0c7f9d8b04d49f24596865e4b7901',
4950
shallow=False, # Optional: full clone instead of shallow
50-
commit_hash='fe513719ea98abade167d8a89e92f600d9d8f0e5' # Optional: specific commit
5151
)
5252
```
5353

src/codegen/git/repo_operator/local_repo_operator.py

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import os
22
from functools import cached_property
3-
from typing import Self, override
3+
from typing import Self, override, Optional
44

55
from codeowners import CodeOwners as CodeOwnersParser
66
from git import Remote
@@ -82,6 +82,48 @@ def create_from_commit(cls, repo_path: str, default_branch: str, commit: str, ur
8282
op.checkout_commit(commit)
8383
return op
8484

85+
@classmethod
86+
def create_from_repo(cls, repo_path: str, url: str) -> Self:
87+
"""Create a fresh clone of a repository or use existing one if up to date.
88+
89+
Args:
90+
repo_path (str): Path where the repo should be cloned
91+
url (str): Git URL of the repository
92+
"""
93+
# Check if repo already exists
94+
if os.path.exists(repo_path):
95+
try:
96+
# Try to initialize git repo from existing path
97+
git_cli = GitCLI(repo_path)
98+
# Check if it has our remote URL
99+
if any(remote.url == url for remote in git_cli.remotes):
100+
# Fetch to check for updates
101+
git_cli.remotes.origin.fetch()
102+
# Get current and remote HEADs
103+
local_head = git_cli.head.commit
104+
remote_head = git_cli.remotes.origin.refs[git_cli.active_branch.name].commit
105+
# If up to date, use existing repo
106+
if local_head.hexsha == remote_head.hexsha:
107+
default_branch = git_cli.active_branch.name
108+
return cls(repo_config=BaseRepoConfig(), repo_path=repo_path, default_branch=default_branch, bot_commit=False)
109+
except Exception:
110+
# If any git operations fail, fallback to fresh clone
111+
pass
112+
113+
# If we get here, repo exists but is not up to date or valid
114+
# Remove the existing directory to do a fresh clone
115+
import shutil
116+
shutil.rmtree(repo_path)
117+
118+
# Do a fresh clone with depth=1 to get latest commit
119+
GitCLI.clone_from(url=url, to_path=repo_path, depth=1)
120+
121+
# Initialize with the cloned repo
122+
git_cli = GitCLI(repo_path)
123+
default_branch = git_cli.active_branch.name
124+
125+
return cls(repo_config=BaseRepoConfig(), repo_path=repo_path, default_branch=default_branch, bot_commit=False)
126+
85127
####################################################################################################################
86128
# PROPERTIES
87129
####################################################################################################################

src/codegen/sdk/core/codebase.py

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1086,29 +1086,16 @@ def set_session_options(self, **kwargs: Unpack[SessionOptions]) -> None:
10861086
self.G.transaction_manager.reset_stopwatch(self.G.session_options.max_seconds)
10871087

10881088
@classmethod
1089-
def fetch_codebase(cls, repo_name: str, *, tmp_dir: str | None = None, shallow: bool = True, commit_hash: str | None = None) -> "Codebase":
1089+
def from_repo(cls, repo_name: str, *, tmp_dir: str | None = None, commit: str | None = None, shallow: bool = True) -> "Codebase":
10901090
"""Fetches a codebase from GitHub and returns a Codebase instance.
10911091
10921092
Args:
10931093
repo_name (str): The name of the repository in format "owner/repo"
10941094
tmp_dir (Optional[str]): The directory to clone the repo into. Defaults to /tmp/codegen
1095+
commit (Optional[str]): The specific commit hash to clone. Defaults to HEAD
10951096
shallow (bool): Whether to do a shallow clone. Defaults to True
1096-
commit_hash (Optional[str]): The specific commit hash to clone. Defaults to HEAD
10971097
Returns:
10981098
Codebase: A Codebase instance initialized with the cloned repository
1099-
Example:
1100-
```python
1101-
import codegen.sdk as sdk
1102-
import logging
1103-
# Enable logging to see progress
1104-
logging.basicConfig(level=logging.INFO)
1105-
# Clone a repository to default location (/tmp/codegen)
1106-
codebase = sdk.fetch_codebase('facebook/react')
1107-
# Or specify a custom directory
1108-
codebase = sdk.fetch_codebase('facebook/react', tmp_dir='~/my_repos')
1109-
# Or clone a specific commit
1110-
codebase = sdk.fetch_codebase('facebook/react', commit_hash='abc123')
1111-
```
11121099
"""
11131100
logger.info(f"Fetching codebase for {repo_name}")
11141101

@@ -1131,10 +1118,14 @@ def fetch_codebase(cls, repo_name: str, *, tmp_dir: str | None = None, shallow:
11311118
try:
11321119
# Use LocalRepoOperator to fetch the repository
11331120
logger.info("Cloning repository...")
1134-
repo_operator = LocalRepoOperator.create_from_commit(
1121+
if commit is None:
1122+
repo_operator = LocalRepoOperator.create_from_repo(repo_path=repo_path, url=repo_url)
1123+
else:
1124+
# Ensure the operator can handle remote operations
1125+
repo_operator = LocalRepoOperator.create_from_commit(
11351126
repo_path=repo_path,
11361127
default_branch="main", # We'll get the actual default branch after clone
1137-
commit=commit_hash or "HEAD",
1128+
commit=commit,
11381129
url=repo_url,
11391130
)
11401131
logger.info("Clone completed successfully")

src/codegen/sdk/fetch_codebase.py

Lines changed: 0 additions & 75 deletions
This file was deleted.

0 commit comments

Comments
 (0)