Skip to content

Commit 9049f1d

Browse files
authored
feat: codegen parse oss repos via CLI and modal (#545)
# Motivation The **Codegen on OSS** package provides a pipeline that: - **Collects repository URLs** from different sources (e.g., CSV files or GitHub searches). - **Parses repositories** using the codegen tool. - **Profiles performance** and logs metrics for each parsing run. - **Logs errors** to help pinpoint parsing failures or performance bottlenecks. <!-- Why is this change necessary? --> # Content <!-- Please include a summary of the change --> see [codegen-on-oss/README.md](https://github.com/codegen-sh/codegen-sdk/blob/acfe3dc07b65670af33b977fa1e7bc8627fd714e/codegen-on-oss/README.md) # Testing <!-- How was the change tested? --> `uv run modal run modal_run.py` No unit tests yet 😿 # Please check the following before marking your PR as ready for review - [ ] I have added tests for my changes - [x] I have updated the documentation or added new documentation as needed
1 parent cdcf2d0 commit 9049f1d

33 files changed

+5036
-0
lines changed

codegen-on-oss/.dockerignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
.git/
2+
repositories/
3+
.venv/
4+
.vscode/
5+
output/

codegen-on-oss/.gitignore

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
docs/source
2+
3+
# From https://raw.githubusercontent.com/github/gitignore/main/Python.gitignore
4+
5+
# Byte-compiled / optimized / DLL files
6+
__pycache__/
7+
*.py[cod]
8+
*$py.class
9+
10+
# C extensions
11+
*.so
12+
13+
# Distribution / packaging
14+
.Python
15+
build/
16+
develop-eggs/
17+
dist/
18+
downloads/
19+
eggs/
20+
.eggs/
21+
lib/
22+
lib64/
23+
parts/
24+
sdist/
25+
var/
26+
wheels/
27+
share/python-wheels/
28+
*.egg-info/
29+
.installed.cfg
30+
*.egg
31+
MANIFEST
32+
33+
# PyInstaller
34+
# Usually these files are written by a python script from a template
35+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
36+
*.manifest
37+
*.spec
38+
39+
# Installer logs
40+
pip-log.txt
41+
pip-delete-this-directory.txt
42+
43+
# Unit test / coverage reports
44+
htmlcov/
45+
.tox/
46+
.nox/
47+
.coverage
48+
.coverage.*
49+
.cache
50+
nosetests.xml
51+
coverage.xml
52+
*.cover
53+
*.py,cover
54+
.hypothesis/
55+
.pytest_cache/
56+
cover/
57+
58+
# Translations
59+
*.mo
60+
*.pot
61+
62+
# Django stuff:
63+
*.log
64+
local_settings.py
65+
db.sqlite3
66+
db.sqlite3-journal
67+
68+
# Flask stuff:
69+
instance/
70+
.webassets-cache
71+
72+
# Scrapy stuff:
73+
.scrapy
74+
75+
# Sphinx documentation
76+
docs/_build/
77+
78+
# PyBuilder
79+
.pybuilder/
80+
target/
81+
82+
# Jupyter Notebook
83+
.ipynb_checkpoints
84+
85+
# IPython
86+
profile_default/
87+
ipython_config.py
88+
89+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
90+
__pypackages__/
91+
92+
# Celery stuff
93+
celerybeat-schedule
94+
celerybeat.pid
95+
96+
# SageMath parsed files
97+
*.sage.py
98+
99+
# Environments
100+
.env
101+
.venv
102+
env/
103+
venv/
104+
ENV/
105+
env.bak/
106+
venv.bak/
107+
108+
# Spyder project settings
109+
.spyderproject
110+
.spyproject
111+
112+
# Rope project settings
113+
.ropeproject
114+
115+
# mkdocs documentation
116+
/site
117+
118+
# mypy
119+
.mypy_cache/
120+
.dmypy.json
121+
dmypy.json
122+
123+
# Pyre type checker
124+
.pyre/
125+
126+
# pytype static type analyzer
127+
.pytype/
128+
129+
# Cython debug symbols
130+
cython_debug/
131+
132+
# Vscode config files
133+
.vscode/
134+
135+
# PyCharm
136+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
137+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
138+
# and can be added to the global gitignore or merged into this file. For a more nuclear
139+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
140+
#.idea/

codegen-on-oss/Dockerfile

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Install uv
2+
FROM python:3.12-slim AS installer
3+
COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv
4+
5+
# Change the working directory to the `app` directory
6+
WORKDIR /app
7+
8+
# Copy the lockfile and `pyproject.toml` into the image
9+
COPY uv.lock /app/uv.lock
10+
COPY pyproject.toml /app/pyproject.toml
11+
12+
# Install dependencies
13+
RUN apt-get update && apt-get install -y git \
14+
&& uv sync --frozen --no-install-project \
15+
&& apt-get clean \
16+
&& rm -rf /var/lib/apt/lists/*
17+
18+
19+
FROM python:3.12-slim
20+
21+
ENV PATH="/venv/bin:/app/scripts:$PATH"
22+
# Copy the project into the image
23+
COPY --from=installer /app/.venv/ /venv
24+
25+
RUN apt-get update && apt-get install -y postgresql-client \
26+
&& rm -rf /var/lib/apt/lists/* \
27+
&& apt-get clean
28+
29+
30+
WORKDIR /app
31+
32+
COPY . .
33+
34+
CMD ["python", "modal_run.py"]

codegen-on-oss/Makefile

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
.PHONY: install
2+
install: ## Install the virtual environment and install the pre-commit hooks
3+
@echo "🚀 Creating virtual environment using uv"
4+
@uv sync
5+
@uv run pre-commit install
6+
7+
.PHONY: check
8+
check: ## Run code quality tools.
9+
@echo "🚀 Checking lock file consistency with 'pyproject.toml'"
10+
@uv lock --locked
11+
@echo "🚀 Linting code: Running pre-commit"
12+
@uv run pre-commit run -a
13+
@echo "🚀 Static type checking: Running mypy"
14+
@uv run mypy
15+
@echo "🚀 Checking for obsolete dependencies: Running deptry"
16+
@uv run deptry .
17+
18+
.PHONY: test
19+
test: ## Test the code with pytest
20+
@echo "🚀 Testing code: Running pytest"
21+
@uv run python -m pytest --cov --cov-config=pyproject.toml --cov-report=xml
22+
23+
.PHONY: build
24+
build: clean-build ## Build wheel file
25+
@echo "🚀 Creating wheel file"
26+
@uvx --from build pyproject-build --installer uv
27+
28+
.PHONY: clean-build
29+
clean-build: ## Clean build artifacts
30+
@echo "🚀 Removing build artifacts"
31+
@uv run python -c "import shutil; import os; shutil.rmtree('dist') if os.path.exists('dist') else None"
32+
33+
.PHONY: help
34+
help:
35+
@uv run python -c "import re; \
36+
[[print(f'\033[36m{m[0]:<20}\033[0m {m[1]}') for m in re.findall(r'^([a-zA-Z_-]+):.*?## (.*)$$', open(makefile).read(), re.M)] for makefile in ('$(MAKEFILE_LIST)').strip().split()]"
37+
38+
.DEFAULT_GOAL := help

0 commit comments

Comments
 (0)