Skip to content

Commit 04da3fa

Browse files
author
codegen-bot
committed
New Codebase Init Flow
1 parent e99b63a commit 04da3fa

File tree

3 files changed

+46
-19
lines changed

3 files changed

+46
-19
lines changed

src/codegen/git/repo_operator/local_repo_operator.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,15 @@ class LocalRepoOperator(RepoOperator):
3232

3333
def __init__(
3434
self,
35-
repo_config: BaseRepoConfig,
3635
repo_path: str, # full path to the repo
36+
repo_config: BaseRepoConfig | None = None,
3737
bot_commit: bool = True,
3838
) -> None:
3939
self._repo_path = repo_path
4040
self._repo_name = os.path.basename(repo_path)
4141
os.makedirs(self.repo_path, exist_ok=True)
4242
GitCLI.init(self.repo_path)
43+
repo_config = repo_config or BaseRepoConfig()
4344
super().__init__(repo_config, self.repo_path, bot_commit)
4445

4546
####################################################################################################################

src/codegen/sdk/codebase/config.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
1+
import os
2+
from typing import Self
3+
14
from pydantic import BaseModel, ConfigDict, Field
25

6+
from codegen.git.repo_operator.local_repo_operator import LocalRepoOperator
37
from codegen.git.repo_operator.repo_operator import RepoOperator
48
from codegen.sdk.enums import ProgrammingLanguage
59
from codegen.sdk.secrets import Secrets
10+
from codegen.sdk.utils import determine_project_language, split_git_path
611

712
HARD_MAX_AI_LIMIT = 500 # Global limit for AI requests
813

@@ -55,6 +60,28 @@ class ProjectConfig(BaseModel):
5560
subdirectories: list[str] | None = None
5661
programming_language: ProgrammingLanguage = ProgrammingLanguage.PYTHON
5762

63+
@classmethod
64+
def from_path(cls, path: str, programming_language: ProgrammingLanguage | None = None) -> Self:
65+
# Split repo_path into (git_root, base_path)
66+
repo_path = os.path.abspath(path)
67+
git_root, base_path = split_git_path(repo_path)
68+
# Create main project
69+
return cls(
70+
repo_operator=LocalRepoOperator(repo_path=git_root),
71+
programming_language=programming_language or determine_project_language(repo_path),
72+
base_path=base_path,
73+
subdirectories=[base_path] if base_path else None,
74+
)
75+
76+
@classmethod
77+
def from_repo_operator(cls, repo_operator: RepoOperator, programming_language: ProgrammingLanguage | None = None, base_path: str | None = None) -> Self:
78+
return cls(
79+
repo_operator=repo_operator,
80+
programming_language=programming_language or determine_project_language(repo_operator.repo_path),
81+
base_path=base_path,
82+
subdirectories=[base_path] if base_path else None,
83+
)
84+
5885

5986
class CodebaseConfig(BaseModel):
6087
"""Configuration for a Codebase. There can be 1 -> many codebases in a single repo

src/codegen/sdk/core/codebase.py

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
from codegen.git.repo_operator.remote_repo_operator import RemoteRepoOperator
2424
from codegen.git.repo_operator.repo_operator import RepoOperator
2525
from codegen.git.schemas.enums import CheckoutResult
26-
from codegen.git.schemas.repo_config import BaseRepoConfig
2726
from codegen.sdk._proxy import proxy_property
2827
from codegen.sdk.ai.helpers import AbstractAIHelper, MultiProviderAIHelper
2928
from codegen.sdk.codebase.codebase_ai import generate_system_prompt, generate_tools
@@ -74,7 +73,6 @@
7473
from codegen.sdk.typescript.statements.import_statement import TSImportStatement
7574
from codegen.sdk.typescript.symbol import TSSymbol
7675
from codegen.sdk.typescript.type_alias import TSTypeAlias
77-
from codegen.sdk.utils import determine_project_language, split_git_path
7876
from codegen.shared.decorators.docs import apidoc, noapidoc, py_noapidoc
7977
from codegen.shared.exceptions.control_flow import MaxAIRequestsError
8078
from codegen.shared.performance.stopwatch_utils import stopwatch
@@ -119,7 +117,8 @@ def __init__(
119117
self,
120118
repo_path: None = None,
121119
*,
122-
projects: list[ProjectConfig],
120+
programming_language: None = None,
121+
projects: list[ProjectConfig] | ProjectConfig,
123122
config: CodebaseConfig = DefaultConfig,
124123
) -> None: ...
125124

@@ -128,6 +127,7 @@ def __init__(
128127
self,
129128
repo_path: str,
130129
*,
130+
programming_language: ProgrammingLanguage,
131131
projects: None = None,
132132
config: CodebaseConfig = DefaultConfig,
133133
) -> None: ...
@@ -136,7 +136,8 @@ def __init__(
136136
self,
137137
repo_path: str | None = None,
138138
*,
139-
projects: list[ProjectConfig] | None = None,
139+
programming_language: ProgrammingLanguage | None = None,
140+
projects: list[ProjectConfig] | ProjectConfig | None = None,
140141
config: CodebaseConfig = DefaultConfig,
141142
) -> None:
142143
# Sanity check inputs
@@ -146,19 +147,16 @@ def __init__(
146147
if repo_path is None and projects is None:
147148
raise ValueError("Must specify either repo_path or projects")
148149

150+
if projects is not None and programming_language is not None:
151+
raise ValueError("Cannot specify both projects and programming_language. Use ProjectConfig.from_path() to create projects with a custom programming_language.")
152+
153+
# If projects is a single ProjectConfig, convert it to a list
154+
if isinstance(projects, ProjectConfig):
155+
projects = [projects]
156+
149157
# Initialize project with repo_path if projects is None
150158
if repo_path is not None:
151-
# Split repo_path into (git_root, base_path)
152-
repo_path = os.path.abspath(repo_path)
153-
git_root, base_path = split_git_path(repo_path)
154-
# Create repo_config
155-
repo_config = BaseRepoConfig()
156-
# Create main project
157-
main_project = ProjectConfig(
158-
repo_operator=LocalRepoOperator(repo_config=repo_config, repo_path=git_root),
159-
programming_language=determine_project_language(repo_path),
160-
base_path=base_path,
161-
)
159+
main_project = ProjectConfig.from_path(repo_path, programming_language=programming_language)
162160
projects = [main_project]
163161
else:
164162
main_project = projects[0]
@@ -1125,14 +1123,16 @@ def set_session_options(self, **kwargs: Unpack[SessionOptions]) -> None:
11251123
self.G.transaction_manager.reset_stopwatch(self.G.session_options.max_seconds)
11261124

11271125
@classmethod
1128-
def from_repo(cls, repo_name: str, *, tmp_dir: str | None = None, commit: str | None = None, shallow: bool = True) -> "Codebase":
1126+
def from_repo(cls, repo_name: str, *, tmp_dir: str | None = None, commit: str | None = None, shallow: bool = True, programming_language: ProgrammingLanguage | None = None) -> "Codebase":
11291127
"""Fetches a codebase from GitHub and returns a Codebase instance.
11301128
11311129
Args:
11321130
repo_name (str): The name of the repository in format "owner/repo"
11331131
tmp_dir (Optional[str]): The directory to clone the repo into. Defaults to /tmp/codegen
11341132
commit (Optional[str]): The specific commit hash to clone. Defaults to HEAD
11351133
shallow (bool): Whether to do a shallow clone. Defaults to True
1134+
programming_language (ProgrammingLanguage | None): The programming language of the repo. Defaults to None.
1135+
11361136
Returns:
11371137
Codebase: A Codebase instance initialized with the cloned repository
11381138
"""
@@ -1163,15 +1163,14 @@ def from_repo(cls, repo_name: str, *, tmp_dir: str | None = None, commit: str |
11631163
# Ensure the operator can handle remote operations
11641164
repo_operator = LocalRepoOperator.create_from_commit(
11651165
repo_path=repo_path,
1166-
default_branch="main", # We'll get the actual default branch after clone
11671166
commit=commit,
11681167
url=repo_url,
11691168
)
11701169
logger.info("Clone completed successfully")
11711170

11721171
# Initialize and return codebase with proper context
11731172
logger.info("Initializing Codebase...")
1174-
project = ProjectConfig(repo_operator=repo_operator, programming_language=determine_project_language(repo_path))
1173+
project = ProjectConfig.from_repo_operator(repo_operator=repo_operator, programming_language=programming_language)
11751174
codebase = Codebase(projects=[project], config=DefaultConfig)
11761175
logger.info("Codebase initialization complete")
11771176
return codebase

0 commit comments

Comments
 (0)