Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/codegen/git/repo_operator/local_repo_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,15 @@ class LocalRepoOperator(RepoOperator):

def __init__(
self,
repo_config: BaseRepoConfig,
repo_path: str, # full path to the repo
repo_config: BaseRepoConfig | None = None,
bot_commit: bool = True,
) -> None:
self._repo_path = repo_path
self._repo_name = os.path.basename(repo_path)
os.makedirs(self.repo_path, exist_ok=True)
GitCLI.init(self.repo_path)
repo_config = repo_config or BaseRepoConfig()
super().__init__(repo_config, self.repo_path, bot_commit)

####################################################################################################################
Expand Down
27 changes: 27 additions & 0 deletions src/codegen/sdk/codebase/config.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
import os
from typing import Self

from pydantic import BaseModel, ConfigDict, Field

from codegen.git.repo_operator.local_repo_operator import LocalRepoOperator
from codegen.git.repo_operator.repo_operator import RepoOperator
from codegen.sdk.enums import ProgrammingLanguage
from codegen.sdk.secrets import Secrets
from codegen.sdk.utils import determine_project_language, split_git_path

HARD_MAX_AI_LIMIT = 500 # Global limit for AI requests

Expand Down Expand Up @@ -55,6 +60,28 @@ class ProjectConfig(BaseModel):
subdirectories: list[str] | None = None
programming_language: ProgrammingLanguage = ProgrammingLanguage.PYTHON

@classmethod
def from_path(cls, path: str, programming_language: ProgrammingLanguage | None = None) -> Self:
# Split repo_path into (git_root, base_path)
repo_path = os.path.abspath(path)
git_root, base_path = split_git_path(repo_path)
# Create main project
return cls(
repo_operator=LocalRepoOperator(repo_path=git_root),
programming_language=programming_language or determine_project_language(repo_path),
base_path=base_path,
subdirectories=[base_path] if base_path else None,
)

@classmethod
def from_repo_operator(cls, repo_operator: RepoOperator, programming_language: ProgrammingLanguage | None = None, base_path: str | None = None) -> Self:
return cls(
repo_operator=repo_operator,
programming_language=programming_language or determine_project_language(repo_operator.repo_path),
base_path=base_path,
subdirectories=[base_path] if base_path else None,
)


class CodebaseConfig(BaseModel):
"""Configuration for a Codebase. There can be 1 -> many codebases in a single repo
Expand Down
35 changes: 17 additions & 18 deletions src/codegen/sdk/core/codebase.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
from codegen.git.repo_operator.remote_repo_operator import RemoteRepoOperator
from codegen.git.repo_operator.repo_operator import RepoOperator
from codegen.git.schemas.enums import CheckoutResult
from codegen.git.schemas.repo_config import BaseRepoConfig
from codegen.sdk._proxy import proxy_property
from codegen.sdk.ai.helpers import AbstractAIHelper, MultiProviderAIHelper
from codegen.sdk.codebase.codebase_ai import generate_system_prompt, generate_tools
Expand Down Expand Up @@ -74,7 +73,6 @@
from codegen.sdk.typescript.statements.import_statement import TSImportStatement
from codegen.sdk.typescript.symbol import TSSymbol
from codegen.sdk.typescript.type_alias import TSTypeAlias
from codegen.sdk.utils import determine_project_language, split_git_path
from codegen.shared.decorators.docs import apidoc, noapidoc, py_noapidoc
from codegen.shared.exceptions.control_flow import MaxAIRequestsError
from codegen.shared.performance.stopwatch_utils import stopwatch
Expand Down Expand Up @@ -119,7 +117,8 @@ def __init__(
self,
repo_path: None = None,
*,
projects: list[ProjectConfig],
programming_language: None = None,
projects: list[ProjectConfig] | ProjectConfig,
config: CodebaseConfig = DefaultConfig,
) -> None: ...

Expand All @@ -128,6 +127,7 @@ def __init__(
self,
repo_path: str,
*,
programming_language: ProgrammingLanguage,
projects: None = None,
config: CodebaseConfig = DefaultConfig,
) -> None: ...
Expand All @@ -136,7 +136,8 @@ def __init__(
self,
repo_path: str | None = None,
*,
projects: list[ProjectConfig] | None = None,
programming_language: ProgrammingLanguage | None = None,
projects: list[ProjectConfig] | ProjectConfig | None = None,
config: CodebaseConfig = DefaultConfig,
) -> None:
# Sanity check inputs
Expand All @@ -146,19 +147,16 @@ def __init__(
if repo_path is None and projects is None:
raise ValueError("Must specify either repo_path or projects")

if projects is not None and programming_language is not None:
raise ValueError("Cannot specify both projects and programming_language. Use ProjectConfig.from_path() to create projects with a custom programming_language.")

# If projects is a single ProjectConfig, convert it to a list
if isinstance(projects, ProjectConfig):
projects = [projects]

# Initialize project with repo_path if projects is None
if repo_path is not None:
# Split repo_path into (git_root, base_path)
repo_path = os.path.abspath(repo_path)
git_root, base_path = split_git_path(repo_path)
# Create repo_config
repo_config = BaseRepoConfig()
# Create main project
main_project = ProjectConfig(
repo_operator=LocalRepoOperator(repo_config=repo_config, repo_path=git_root),
programming_language=determine_project_language(repo_path),
base_path=base_path,
)
main_project = ProjectConfig.from_path(repo_path, programming_language=programming_language)
projects = [main_project]
else:
main_project = projects[0]
Expand Down Expand Up @@ -1125,14 +1123,16 @@ def set_session_options(self, **kwargs: Unpack[SessionOptions]) -> None:
self.G.transaction_manager.reset_stopwatch(self.G.session_options.max_seconds)

@classmethod
def from_repo(cls, repo_name: str, *, tmp_dir: str | None = None, commit: str | None = None, shallow: bool = True) -> "Codebase":
def from_repo(cls, repo_name: str, *, tmp_dir: str | None = None, commit: str | None = None, shallow: bool = True, programming_language: ProgrammingLanguage | None = None) -> "Codebase":
"""Fetches a codebase from GitHub and returns a Codebase instance.

Args:
repo_name (str): The name of the repository in format "owner/repo"
tmp_dir (Optional[str]): The directory to clone the repo into. Defaults to /tmp/codegen
commit (Optional[str]): The specific commit hash to clone. Defaults to HEAD
shallow (bool): Whether to do a shallow clone. Defaults to True
programming_language (ProgrammingLanguage | None): The programming language of the repo. Defaults to None.

Returns:
Codebase: A Codebase instance initialized with the cloned repository
"""
Expand Down Expand Up @@ -1163,15 +1163,14 @@ def from_repo(cls, repo_name: str, *, tmp_dir: str | None = None, commit: str |
# Ensure the operator can handle remote operations
repo_operator = LocalRepoOperator.create_from_commit(
repo_path=repo_path,
default_branch="main", # We'll get the actual default branch after clone
commit=commit,
url=repo_url,
)
logger.info("Clone completed successfully")

# Initialize and return codebase with proper context
logger.info("Initializing Codebase...")
project = ProjectConfig(repo_operator=repo_operator, programming_language=determine_project_language(repo_path))
project = ProjectConfig.from_repo_operator(repo_operator=repo_operator, programming_language=programming_language)
codebase = Codebase(projects=[project], config=DefaultConfig)
logger.info("Codebase initialization complete")
return codebase
Expand Down