Skip to content

Commit 40fcf62

Browse files
Merge branch 'develop' into api-reference-v0
2 parents c5794e4 + 8252eb1 commit 40fcf62

File tree

5 files changed

+119
-11
lines changed

5 files changed

+119
-11
lines changed

docs/building-with-codegen/parsing-codebases.mdx

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@ iconType: "solid"
77

88
The primary entrypoint to programs leveraging Codegen is the [Codebase](../api-reference/core/Codebase) class.
99

10-
Construct a Codebase by passing in a path to a local `git` repository.
11-
10+
## Local Codebases
1211

12+
Construct a Codebase by passing in a path to a local `git` repository.
1313

1414
```python
1515
from codegen import Codebase
@@ -20,13 +20,46 @@ codebase = Codebase("path/to/repository")
2020
# Parse from current directory
2121
codebase = Codebase("./")
2222
```
23-
<Note>This will automatically infer the programming language of the codebase and parse all files in the codebase.</Note>
2423

25-
<Tip>The initial parse may take a few minutes for large codebases. This pre-computation enables constant-time operations afterward. [Learn more here.](/introduction/how-it-works)</Tip>
24+
<Note>
25+
This will automatically infer the programming language of the codebase and
26+
parse all files in the codebase.
27+
</Note>
28+
29+
<Tip>
30+
The initial parse may take a few minutes for large codebases. This
31+
pre-computation enables constant-time operations afterward. [Learn more
32+
here.](/introduction/how-it-works)
33+
</Tip>
34+
35+
## Remote Repositories
36+
37+
To fetch and parse a repository directly from GitHub, use the `fetch_codebase` function.
38+
39+
```python
40+
import codegen
41+
42+
# Fetch and parse a repository (defaults to /tmp/codegen/{repo_name})
43+
codebase = codegen.fetch_codebase('fastapi/fastapi')
44+
45+
# Customize temp directory, clone depth, or specific commit
46+
codebase = codegen.fetch_codebase(
47+
'fastapi/fastapi',
48+
tmp_dir='/custom/temp/dir', # Optional: custom temp directory
49+
shallow=False, # Optional: full clone instead of shallow
50+
commit_hash='fe513719ea98abade167d8a89e92f600d9d8f0e5' # Optional: specific commit
51+
)
52+
```
53+
54+
<Note>
55+
Remote repositories are cloned to the `/tmp/codegen/{repo_name}` directory by
56+
default. The clone is shallow by default for better performance.
57+
</Note>
2658

2759
## Supported Languages
2860

2961
Codegen currently supports:
62+
3063
- [Python](/api-reference/python)
3164
- [TypeScript/JavaScript](/api-reference/javascript)
3265
- [React/JSX](/building-with-codegen/react-and-jsx)

src/codemods/canonical/pivot_return_types/pivot_return_types.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,9 @@ def f() -> FastStr:
3535
def execute(self, codebase: Codebase) -> None:
3636
# Iterate over all functions in the codebase
3737
for function in codebase.functions:
38-
# Check if the function's return type annotation is 'BillPayVendor'
38+
# Check if the function's return type annotation is 'str'
3939
if (return_type := function.return_type) and return_type.source == "str":
40-
# Update the return type to 'Payee'
40+
# Update the return type to 'FastStr'
4141
function.set_return_type("FastStr")
4242

4343
# Add import for 'FastStr' if it doesn't exist

src/codemods/canonical/rename_local_variables/rename_local_variables.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def execute(self, codebase: Codebase) -> None:
4242
for file in codebase.files:
4343
for function in file.functions:
4444
# Check if any local variable names contain "position"
45-
business_vendor_usages = function.code_block.get_variable_usages("position", fuzzy_match=True)
46-
if len(business_vendor_usages) > 0:
45+
position_usages = function.code_block.get_variable_usages("position", fuzzy_match=True)
46+
if len(position_usages) > 0:
4747
# Rename
4848
function.rename_local_variable("position", "pos", fuzzy_match=True)

src/codemods/canonical/swap_class_attribute_usages/swap_class_attribute_usages.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,14 +44,14 @@ def execute(self, codebase: Codebase) -> None:
4444
# Add import of `CacheConfig` to function definition file
4545
function.file.add_symbol_import(class_b_symb)
4646

47-
# Check if the function body is using `bill_pay_vendor`
47+
# Check if the function body is using `cache_config`
4848
if len(function.code_block.get_variable_usages(class_a_param.name)) > 0:
4949
# Add "wrapper" inside the function
50-
# This creates the `business_vendor` variable internally
50+
# This creates the `cache_config` variable internally
5151
proxy_var_declaration = f"""{class_a_param.name} = cache_config.settings # added by Codegen"""
5252
function.prepend_statements(proxy_var_declaration)
5353

54-
# Update all callsites of original function to take in `payee` instead of `bill_pay_vendor`
54+
# Update all callsites of original function to take in `cache_config` instead of `graph_rag_config`
5555
fcalls = function.call_sites
5656
for fcall in fcalls:
5757
arg = fcall.get_arg_by_parameter_name(class_a_param.name)

src/graph_sitter/fetch_codebase.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import logging
2+
import os
3+
4+
from codegen_git.repo_operator.local_repo_operator import LocalRepoOperator
5+
from graph_sitter.codebase.config import DefaultConfig, ProjectConfig
6+
from graph_sitter.core.codebase import Codebase
7+
from graph_sitter.utils import determine_project_language
8+
9+
logger = logging.getLogger(__name__)
10+
11+
DEFAULT_CODEGEN_DIR = "/tmp/codegen"
12+
13+
14+
def fetch_codebase(repo_name: str, *, tmp_dir: str | None = None, shallow: bool = True, commit_hash: str | None = None) -> Codebase:
15+
"""Fetches a codebase from GitHub and returns a Codebase instance.
16+
17+
Args:
18+
repo_name (str): The name of the repository in format "owner/repo"
19+
tmp_dir (Optional[str]): The directory to clone the repo into. Defaults to /tmp/codegen
20+
shallow (bool): Whether to do a shallow clone. Defaults to True
21+
commit_hash (Optional[str]): The specific commit hash to clone. Defaults to HEAD
22+
Returns:
23+
Codebase: A Codebase instance initialized with the cloned repository
24+
Example:
25+
```python
26+
import graph_sitter
27+
import logging
28+
# Enable logging to see progress
29+
logging.basicConfig(level=logging.INFO)
30+
# Clone a repository to default location (/tmp/codegen)
31+
codebase = graph_sitter.fetch_codebase('facebook/react')
32+
# Or specify a custom directory
33+
codebase = graph_sitter.fetch_codebase('facebook/react', tmp_dir='~/my_repos')
34+
# Or clone a specific commit
35+
codebase = graph_sitter.fetch_codebase('facebook/react', commit_hash='abc123')
36+
```
37+
"""
38+
logger.info(f"Fetching codebase for {repo_name}")
39+
40+
# Parse repo name
41+
if "/" not in repo_name:
42+
raise ValueError("repo_name must be in format 'owner/repo'")
43+
owner, repo = repo_name.split("/")
44+
45+
# Setup temp directory
46+
if tmp_dir is None:
47+
tmp_dir = DEFAULT_CODEGEN_DIR
48+
os.makedirs(tmp_dir, exist_ok=True)
49+
logger.info(f"Using directory: {tmp_dir}")
50+
51+
# Setup repo path and URL
52+
repo_path = os.path.join(tmp_dir, repo)
53+
repo_url = f"https://github.com/{repo_name}.git"
54+
logger.info(f"Will clone {repo_url} to {repo_path}")
55+
56+
try:
57+
# Use LocalRepoOperator to fetch the repository
58+
logger.info("Cloning repository...")
59+
repo_operator = LocalRepoOperator.create_from_commit(
60+
repo_path=repo_path,
61+
default_branch="main", # We'll get the actual default branch after clone
62+
commit=commit_hash or "HEAD",
63+
url=repo_url,
64+
)
65+
logger.info("Clone completed successfully")
66+
67+
# Initialize and return codebase with proper context
68+
logger.info("Initializing Codebase...")
69+
project = ProjectConfig(repo_operator=repo_operator, programming_language=determine_project_language(repo_path))
70+
codebase = Codebase(projects=[project], config=DefaultConfig)
71+
logger.info("Codebase initialization complete")
72+
return codebase
73+
except Exception as e:
74+
logger.error(f"Failed to initialize codebase: {e}")
75+
raise

0 commit comments

Comments
 (0)