@@ -1086,6 +1086,78 @@ def set_session_options(self, **kwargs: Unpack[SessionOptions]) -> None:
10861086 self .G .transaction_manager .reset_stopwatch (self .G .session_options .max_seconds )
10871087
10881088
1089+ @classmethod
1090+ def fetch_codebase (
1091+ cls ,
1092+ repo_name : str ,
1093+ * ,
1094+ tmp_dir : str | None = None ,
1095+ shallow : bool = True ,
1096+ commit_hash : str | None = None
1097+ ) -> "Codebase" :
1098+ """Fetches a codebase from GitHub and returns a Codebase instance.
1099+
1100+ Args:
1101+ repo_name (str): The name of the repository in format "owner/repo"
1102+ tmp_dir (Optional[str]): The directory to clone the repo into. Defaults to /tmp/codegen
1103+ shallow (bool): Whether to do a shallow clone. Defaults to True
1104+ commit_hash (Optional[str]): The specific commit hash to clone. Defaults to HEAD
1105+ Returns:
1106+ Codebase: A Codebase instance initialized with the cloned repository
1107+ Example:
1108+ ```python
1109+ import codegen.sdk as sdk
1110+ import logging
1111+ # Enable logging to see progress
1112+ logging.basicConfig(level=logging.INFO)
1113+ # Clone a repository to default location (/tmp/codegen)
1114+ codebase = sdk.fetch_codebase('facebook/react')
1115+ # Or specify a custom directory
1116+ codebase = sdk.fetch_codebase('facebook/react', tmp_dir='~/my_repos')
1117+ # Or clone a specific commit
1118+ codebase = sdk.fetch_codebase('facebook/react', commit_hash='abc123')
1119+ ```
1120+ """
1121+ logger .info (f"Fetching codebase for { repo_name } " )
1122+
1123+ # Parse repo name
1124+ if "/" not in repo_name :
1125+ raise ValueError ("repo_name must be in format 'owner/repo'" )
1126+ owner , repo = repo_name .split ("/" )
1127+
1128+ # Setup temp directory
1129+ if tmp_dir is None :
1130+ tmp_dir = "/tmp/codegen"
1131+ os .makedirs (tmp_dir , exist_ok = True )
1132+ logger .info (f"Using directory: { tmp_dir } " )
1133+
1134+ # Setup repo path and URL
1135+ repo_path = os .path .join (tmp_dir , repo )
1136+ repo_url = f"https://github.com/{ repo_name } .git"
1137+ logger .info (f"Will clone { repo_url } to { repo_path } " )
1138+
1139+ try :
1140+ # Use LocalRepoOperator to fetch the repository
1141+ logger .info ("Cloning repository..." )
1142+ repo_operator = LocalRepoOperator .create_from_commit (
1143+ repo_path = repo_path ,
1144+ default_branch = "main" , # We'll get the actual default branch after clone
1145+ commit = commit_hash or "HEAD" ,
1146+ url = repo_url ,
1147+ )
1148+ logger .info ("Clone completed successfully" )
1149+
1150+ # Initialize and return codebase with proper context
1151+ logger .info ("Initializing Codebase..." )
1152+ project = ProjectConfig (repo_operator = repo_operator ,
1153+ programming_language = determine_project_language (repo_path ))
1154+ codebase = Codebase (projects = [project ], config = DefaultConfig )
1155+ logger .info ("Codebase initialization complete" )
1156+ return codebase
1157+ except Exception as e :
1158+ logger .error (f"Failed to initialize codebase: { e } " )
1159+ raise
1160+
10891161# The last 2 lines of code are added to the runner. See codegen-backend/cli/generate/utils.py
10901162# Type Aliases
10911163CodebaseType = Codebase [SourceFile , Directory , Symbol , Class , Function , Import , Assignment , Interface , TypeAlias , Parameter , CodeBlock ]
0 commit comments