Skip to content

Commit 583a2f3

Browse files
committed
add: codegen's modal run to postgresql/grafana
1 parent f4698c1 commit 583a2f3

File tree

18 files changed

+465
-81
lines changed

18 files changed

+465
-81
lines changed

codegen-on-oss/Dockerfile

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,15 @@ RUN apt-get update && apt-get install -y git \
1818

1919
FROM python:3.12-slim
2020

21-
ENV PATH="/venv/bin:$PATH"
21+
ENV PATH="/venv/bin:/app/scripts:$PATH"
2222
# Copy the project into the image
2323
COPY --from=installer /app/.venv/ /venv
2424

25+
RUN apt-get update && apt-get install -y postgresql-client \
26+
&& rm -rf /var/lib/apt/lists/* \
27+
&& apt-get clean
28+
29+
2530
WORKDIR /app
2631

2732
COPY . .
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
import sys
2+
from pathlib import Path
3+
4+
import modal
5+
from loguru import logger
6+
7+
from codegen_on_oss.cache import cachedir
8+
from codegen_on_oss.metrics import MetricsProfiler
9+
from codegen_on_oss.outputs.sql_output import PostgresSQLOutput
10+
from codegen_on_oss.parser import CodegenParser
11+
12+
app = modal.App("codegen-oss-parse")
13+
14+
15+
codegen_repo_volume = modal.Volume.from_name(
16+
"codegen-oss-repo-volume",
17+
create_if_missing=True,
18+
)
19+
20+
21+
aws_secrets = modal.Secret.from_name(
22+
"codegen-oss-parse-secrets",
23+
)
24+
25+
26+
@app.function(
27+
name="parse_repo",
28+
concurrency_limit=10,
29+
cpu=4,
30+
memory=16384,
31+
timeout=3600 * 8,
32+
secrets=[aws_secrets],
33+
volumes={
34+
str(cachedir.absolute()): codegen_repo_volume,
35+
},
36+
proxy=modal.Proxy.from_name("codegen-parse-proxy"),
37+
image=modal.Image.debian_slim(python_version="3.13")
38+
.pip_install("uv")
39+
.apt_install("git") # required by codegen sdk
40+
.env({"PATH": "/app/.venv/bin:$PATH"})
41+
.workdir("/app")
42+
.add_local_file("uv.lock", remote_path="/app/uv.lock", copy=True)
43+
.add_local_file("pyproject.toml", remote_path="/app/pyproject.toml", copy=True)
44+
.run_commands("uv sync --frozen --no-install-project --extra sql")
45+
.add_local_python_source("codegen_on_oss", copy=True),
46+
# .add_local_python_source("codegen_on_oss"),
47+
# .add_local_dir("codegen_on_oss", remote_path="/app/codegen_on_oss"),
48+
)
49+
def parse_repo(
50+
repo_url: str,
51+
commit_hash: str | None,
52+
language: str | None = None,
53+
):
54+
"""
55+
Parse repositories on Modal.
56+
57+
Args:
58+
repo_url: The URL of the repository to parse.
59+
commit_hash: The commit hash of the repository to parse.
60+
"""
61+
logger.add(sys.stdout, format="{time: HH:mm:ss} {level} {message}", level="DEBUG")
62+
63+
output = PostgresSQLOutput(modal_function_call_id=modal.current_function_call_id())
64+
metrics_profiler = MetricsProfiler(output)
65+
parser = CodegenParser(Path(cachedir) / "repositories", metrics_profiler)
66+
# Refresh any updating repo data from other instances
67+
codegen_repo_volume.reload()
68+
try:
69+
parser.parse(repo_url, language, commit_hash)
70+
except Exception as e:
71+
logger.exception(f"Error parsing repository {repo_url}: {e}")
72+
finally:
73+
# Commit any cache changes to the repo volume
74+
codegen_repo_volume.commit()
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import modal
2+
3+
from codegen_on_oss.sources import GithubSettings, GithubSource
4+
5+
app = modal.App("codegen-oss-parse")
6+
7+
8+
@app.local_entrypoint()
9+
def main(
10+
languages: str = "python,typescript",
11+
heuristic: str = "stars",
12+
num_repos: int = 100,
13+
):
14+
"""
15+
Main entrypoint for the parse app.
16+
"""
17+
parse_repo_on_modal_fn = modal.Function.from_name("codegen-oss-parse", "parse_repo")
18+
for language in languages.split(","):
19+
repo_source = GithubSource(
20+
GithubSettings(
21+
language=language.strip(), heuristic=heuristic, num_repos=num_repos
22+
)
23+
)
24+
for repo_url, commit_hash in repo_source:
25+
parse_repo_on_modal_fn.spawn(
26+
repo_url=repo_url,
27+
commit_hash=commit_hash,
28+
language=language,
29+
)
Lines changed: 10 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from datetime import datetime
12
from importlib.metadata import version
23
from typing import TYPE_CHECKING
34

@@ -6,35 +7,20 @@
67
if TYPE_CHECKING:
78
from types_boto3_s3 import S3Client
89

9-
from codegen_on_oss.sources import RepoSource
10-
1110

1211
class BucketStore:
1312
s3_client: "S3Client"
1413

1514
def __init__(self, bucket_name: str):
1615
self.bucket_name = bucket_name
1716
self.s3_client = client("s3")
18-
19-
def upload_run(
20-
self,
21-
repo_source: "RepoSource",
22-
log_output_path: str,
23-
metrics_output_path: str,
24-
):
25-
codegen_version = str(version("codegen"))
26-
key_prefix: str = f"{codegen_version}/{repo_source.source_type}"
27-
config_key = f"{key_prefix}/config.json"
28-
29-
self.s3_client.put_object(
30-
Bucket=self.bucket_name,
31-
Key=config_key,
32-
Body=repo_source.settings.model_dump_json(indent=4).encode("utf-8"),
33-
ContentType="application/json",
17+
self.key_prefix: str = str(version("codegen"))
18+
19+
def upload_file(self, local_path: str, remote_path: str) -> str:
20+
key = f"{self.key_prefix}/{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}/{remote_path}"
21+
self.s3_client.upload_file(
22+
local_path,
23+
self.bucket_name,
24+
key,
3425
)
35-
36-
log_key = f"{key_prefix}/output.logs"
37-
self.s3_client.upload_file(log_output_path, self.bucket_name, log_key)
38-
39-
metrics_key = f"{key_prefix}/metrics.csv"
40-
self.s3_client.upload_file(metrics_output_path, self.bucket_name, metrics_key)
26+
return key

codegen-on-oss/codegen_on_oss/cli.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
from codegen_on_oss.cache import cachedir
88
from codegen_on_oss.metrics import MetricsProfiler
9+
from codegen_on_oss.outputs.csv_output import CSVOutput
910
from codegen_on_oss.parser import CodegenParser
1011
from codegen_on_oss.sources import RepoSource, all_sources
1112

@@ -60,7 +61,8 @@ def run_one(
6061
"""
6162
logger.add(error_output_path, level="ERROR")
6263
logger.add(sys.stdout, level="DEBUG" if debug else "INFO")
63-
metrics_profiler = MetricsProfiler(output_path)
64+
output = CSVOutput(MetricsProfiler.fields(), output_path)
65+
metrics_profiler = MetricsProfiler(output)
6466

6567
parser = CodegenParser(Path(cache_dir) / "repositories", metrics_profiler)
6668
parser.parse(url, commit_hash)
@@ -115,7 +117,8 @@ def run(
115117
)
116118

117119
repo_source = RepoSource.from_source_type(source)
118-
metrics_profiler = MetricsProfiler(output_path)
120+
output = CSVOutput(MetricsProfiler.fields(), output_path)
121+
metrics_profiler = MetricsProfiler(output)
119122
parser = CodegenParser(Path(cache_dir) / "repositories", metrics_profiler)
120123
for repo_url, commit_hash in repo_source:
121124
parser.parse(repo_url, commit_hash)

0 commit comments

Comments
 (0)