Skip to content

Commit 3f06e59

Browse files
committed
Gets Codebase.fetch_codebase() working
1 parent cfaca9f commit 3f06e59

File tree

1 file changed

+72
-0
lines changed

1 file changed

+72
-0
lines changed

src/codegen/sdk/core/codebase.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1086,6 +1086,78 @@ def set_session_options(self, **kwargs: Unpack[SessionOptions]) -> None:
10861086
self.G.transaction_manager.reset_stopwatch(self.G.session_options.max_seconds)
10871087

10881088

1089+
@classmethod
1090+
def fetch_codebase(
1091+
cls,
1092+
repo_name: str,
1093+
*,
1094+
tmp_dir: str | None = None,
1095+
shallow: bool = True,
1096+
commit_hash: str | None = None
1097+
) -> "Codebase":
1098+
"""Fetches a codebase from GitHub and returns a Codebase instance.
1099+
1100+
Args:
1101+
repo_name (str): The name of the repository in format "owner/repo"
1102+
tmp_dir (Optional[str]): The directory to clone the repo into. Defaults to /tmp/codegen
1103+
shallow (bool): Whether to do a shallow clone. Defaults to True
1104+
commit_hash (Optional[str]): The specific commit hash to clone. Defaults to HEAD
1105+
Returns:
1106+
Codebase: A Codebase instance initialized with the cloned repository
1107+
Example:
1108+
```python
1109+
import codegen.sdk as sdk
1110+
import logging
1111+
# Enable logging to see progress
1112+
logging.basicConfig(level=logging.INFO)
1113+
# Clone a repository to default location (/tmp/codegen)
1114+
codebase = sdk.fetch_codebase('facebook/react')
1115+
# Or specify a custom directory
1116+
codebase = sdk.fetch_codebase('facebook/react', tmp_dir='~/my_repos')
1117+
# Or clone a specific commit
1118+
codebase = sdk.fetch_codebase('facebook/react', commit_hash='abc123')
1119+
```
1120+
"""
1121+
logger.info(f"Fetching codebase for {repo_name}")
1122+
1123+
# Parse repo name
1124+
if "/" not in repo_name:
1125+
raise ValueError("repo_name must be in format 'owner/repo'")
1126+
owner, repo = repo_name.split("/")
1127+
1128+
# Setup temp directory
1129+
if tmp_dir is None:
1130+
tmp_dir = "/tmp/codegen"
1131+
os.makedirs(tmp_dir, exist_ok=True)
1132+
logger.info(f"Using directory: {tmp_dir}")
1133+
1134+
# Setup repo path and URL
1135+
repo_path = os.path.join(tmp_dir, repo)
1136+
repo_url = f"https://github.com/{repo_name}.git"
1137+
logger.info(f"Will clone {repo_url} to {repo_path}")
1138+
1139+
try:
1140+
# Use LocalRepoOperator to fetch the repository
1141+
logger.info("Cloning repository...")
1142+
repo_operator = LocalRepoOperator.create_from_commit(
1143+
repo_path=repo_path,
1144+
default_branch="main", # We'll get the actual default branch after clone
1145+
commit=commit_hash or "HEAD",
1146+
url=repo_url,
1147+
)
1148+
logger.info("Clone completed successfully")
1149+
1150+
# Initialize and return codebase with proper context
1151+
logger.info("Initializing Codebase...")
1152+
project = ProjectConfig(repo_operator=repo_operator,
1153+
programming_language=determine_project_language(repo_path))
1154+
codebase = Codebase(projects=[project], config=DefaultConfig)
1155+
logger.info("Codebase initialization complete")
1156+
return codebase
1157+
except Exception as e:
1158+
logger.error(f"Failed to initialize codebase: {e}")
1159+
raise
1160+
10891161
# The last 2 lines of code are added to the runner. See codegen-backend/cli/generate/utils.py
10901162
# Type Aliases
10911163
CodebaseType = Codebase[SourceFile, Directory, Symbol, Class, Function, Import, Assignment, Interface, TypeAlias, Parameter, CodeBlock]

0 commit comments

Comments
 (0)