Skip to content

Commit 4177e08

Browse files
committed
convert to modal.Dict snapshot manager
1 parent 3a8a2cd commit 4177e08

File tree

3 files changed

+187
-11
lines changed

3 files changed

+187
-11
lines changed

codegen-examples/examples/swebench_agent_run/run_eval.py

Lines changed: 56 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,18 @@
11
import asyncio
22
import json
33
import traceback
4-
from pathlib import Path
54
import uuid
6-
import modal
7-
import click
85
from datetime import datetime
9-
from codegen.extensions.swebench.utils import SWEBenchDataset, SweBenchExample, get_swe_bench_examples
6+
from pathlib import Path
7+
8+
import click
9+
import modal
1010
from codegen.extensions.swebench.report import generate_report
11+
from codegen.extensions.swebench.utils import (
12+
SWEBenchDataset,
13+
SweBenchExample,
14+
get_swe_bench_examples,
15+
)
1116

1217
PREDS_DNAME = Path(__file__).parent / "predictions"
1318
LOG_DIR = Path(__file__).parent / "logs"
@@ -61,11 +66,26 @@ async def process_batch(examples: list[SweBenchExample], batch_size=10):
6166
print("Traceback:")
6267
print("".join(error_info["traceback"]))
6368

64-
results.append({"instance_id": example.instance_id, "status": "error", "error_info": error_info})
69+
results.append(
70+
{
71+
"instance_id": example.instance_id,
72+
"status": "error",
73+
"error_info": error_info,
74+
}
75+
)
6576
else:
6677
if result is None:
6778
print(f"Warning: Null result for {example.instance_id}")
68-
results.append({"instance_id": example.instance_id, "status": "error", "error_info": {"error_type": "NullResult", "error_message": "Process returned None"}})
79+
results.append(
80+
{
81+
"instance_id": example.instance_id,
82+
"status": "error",
83+
"error_info": {
84+
"error_type": "NullResult",
85+
"error_message": "Process returned None",
86+
},
87+
}
88+
)
6989
else:
7090
results.append(result)
7191

@@ -81,14 +101,24 @@ async def process_batch(examples: list[SweBenchExample], batch_size=10):
81101
{
82102
"instance_id": example.instance_id,
83103
"status": "error",
84-
"error_info": {"error_type": type(e).__name__, "error_message": str(e), "traceback": traceback.format_exc(), "batch_failure": True},
104+
"error_info": {
105+
"error_type": type(e).__name__,
106+
"error_message": str(e),
107+
"traceback": traceback.format_exc(),
108+
"batch_failure": True,
109+
},
85110
}
86111
)
87112

88113
return results
89114

90115

91-
async def run_eval(use_existing_preds: str | None, dataset: str, length: int, instance_id: str | None = None):
116+
async def run_eval(
117+
use_existing_preds: str | None,
118+
dataset: str,
119+
length: int,
120+
instance_id: str | None = None,
121+
):
92122
run_id = use_existing_preds or str(uuid.uuid4())
93123
predictions_dir = PREDS_DNAME / f"results_{run_id}"
94124
dataset = SWEBenchDataset(dataset)
@@ -155,10 +185,25 @@ async def run_eval(use_existing_preds: str | None, dataset: str, length: int, in
155185

156186

157187
@click.command()
158-
@click.option("--use-existing-preds", help="The run ID of the existing predictions to use.", type=str, default=None)
159-
@click.option("--dataset", help="The dataset to use.", type=click.Choice([dataset.value for dataset in SWEBenchDataset]), default=SWEBenchDataset.LITE.value)
188+
@click.option(
189+
"--use-existing-preds",
190+
help="The run ID of the existing predictions to use.",
191+
type=str,
192+
default=None,
193+
)
194+
@click.option(
195+
"--dataset",
196+
help="The dataset to use.",
197+
type=click.Choice([dataset.value for dataset in SWEBenchDataset]),
198+
default=SWEBenchDataset.LITE.value,
199+
)
160200
@click.option("--length", help="The number of examples to process.", type=int, default=10)
161-
@click.option("--instance-id", help="The instance ID of the example to process.", type=str, default=None)
201+
@click.option(
202+
"--instance-id",
203+
help="The instance ID of the example to process.",
204+
type=str,
205+
default=None,
206+
)
162207
def run_eval_command(use_existing_preds, dataset, length, instance_id):
163208
asyncio.run(run_eval(use_existing_preds, dataset, length, instance_id))
164209

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
from contextlib import asynccontextmanager
2+
3+
import modal
4+
from codegen.extensions.swebench.utils import SweBenchExample
5+
6+
from .snapshot_manager import (
7+
ModalDictSnapshotManager,
8+
SnapshotManager,
9+
)
10+
11+
BASE_IMAGE: modal.Image = modal.Image.debian_slim(python_version="3.13").apt_install("git")
12+
13+
try:
14+
# To ensure secrets are consistent across runs, we look up existing secret
15+
secret = modal.Secret.from_name("swebench-agent-run-secrets")
16+
except modal.exception.NotFoundError:
17+
secret = modal.Secret.from_dotenv()
18+
19+
app = modal.App.lookup(name="swebench-agent-run", create_if_missing=True)
20+
21+
22+
class SandboxManager:
23+
keep_alive: bool
24+
25+
def __init__(
26+
self,
27+
keep_alive: bool = False,
28+
snapshot_manager: SnapshotManager | None = None,
29+
):
30+
self.keep_alive = keep_alive
31+
self.snapshot_manager = snapshot_manager or ModalDictSnapshotManager()
32+
33+
async def create_sandbox(self, example: SweBenchExample) -> modal.Sandbox:
34+
existing_snapshot_uid = await self.snapshot_manager.get_snapshot_uid(example)
35+
if existing_snapshot_uid:
36+
return await modal.Sandbox._experimental_from_snapshot(existing_snapshot_uid)
37+
38+
# TODO: test if this get local version works / add ability to install specific version
39+
with modal.enable_output():
40+
return await modal.Sandbox.create(
41+
app=app,
42+
image=BASE_IMAGE.run_commands(f"cd /root; git clone {example.repo} && cd {example.repo} && git checkout {example.environment_setup_commit}"),
43+
secrets=[secret],
44+
tags={"repo": example.repo, "commit": example.environment_setup_commit},
45+
)
46+
47+
@asynccontextmanager
48+
async def get_sandbox(self, example: SweBenchExample):
49+
async for sandbox in modal.Sandbox.list(
50+
app_id=app.app_id,
51+
tags={"repo": example.repo, "commit": example.environment_setup_commit},
52+
):
53+
break
54+
else:
55+
sandbox = await self.create_sandbox(example)
56+
57+
try:
58+
await sandbox.wait()
59+
yield sandbox
60+
finally:
61+
if not self.keep_alive:
62+
# Killing sandbox, so take a snapshot and save it
63+
await sandbox.exec(
64+
"bash",
65+
"-c",
66+
f"cd /root/{example.repo}; git stash", # cheeky little stash
67+
)
68+
snapshot = await sandbox._experimental_snapshot() # commit any codegen updates
69+
70+
await self.snapshot_manager.save_snapshot_uid(example, snapshot.object_id)
71+
72+
# Codebase.from_repo doesn't use git to fetch/checkout the repo.
73+
# We could replace this with our own git commands to control the file state
74+
await sandbox.terminate()
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import io
2+
import json
3+
from collections import defaultdict
4+
5+
import modal
6+
from codegen.extensions.swebench.utils import SweBenchExample
7+
8+
9+
class SnapshotManager:
10+
async def get_snapshot_uid(self, example: SweBenchExample) -> str:
11+
raise NotImplementedError("Not implemented")
12+
13+
async def save_snapshot_uid(self, example: SweBenchExample, snapshot_uid: str) -> None:
14+
raise NotImplementedError("Not implemented")
15+
16+
17+
class VolumeSnapshotManager(SnapshotManager):
18+
def __init__(self, volume_name: str = "swebench-agent-snapshot-volume"):
19+
self.snapshot_volume = modal.Volume.from_name(volume_name, create_if_missing=True)
20+
self.snapshot_meta_file_path: str = "/root/snapshot_meta.json"
21+
22+
async def get_snapshot_uid(self, example: SweBenchExample) -> str:
23+
snapshot_meta = await self.read_snapshot_meta()
24+
return snapshot_meta[example.repo][example.base_commit]
25+
26+
async def save_snapshot_uid(self, example: SweBenchExample, snapshot_uid: str) -> None:
27+
snapshot_meta = await self.read_snapshot_meta()
28+
snapshot_meta[example.repo][example.environment_setup_commit] = snapshot_uid
29+
async with self.snapshot_volume.batch_upload() as upload:
30+
await upload.put_file(
31+
io.BytesIO(json.dumps(snapshot_meta).encode("utf-8")),
32+
self.snapshot_meta_file_path,
33+
)
34+
await self.snapshot_volume.commit()
35+
36+
async def read_snapshot_meta(self) -> dict[str, dict[str, str]]:
37+
bytes_io = io.BytesIO()
38+
try:
39+
await self.snapshot_volume.read_file_into_fileobj(self.snapshot_meta_file_path, bytes_io)
40+
snapshot_meta = json.loads(bytes_io.getvalue().decode("utf-8"))
41+
except FileNotFoundError:
42+
snapshot_meta = {}
43+
return defaultdict(lambda: defaultdict(lambda: None), snapshot_meta)
44+
45+
46+
class ModalDictSnapshotManager(SnapshotManager):
47+
def __init__(self, name: str = "swebench-agent-snapshot-dict"):
48+
self.snapshot_dict = modal.Dict.from_name(name, create_if_missing=True)
49+
50+
async def get_snapshot_uid(self, example: SweBenchExample) -> str | None:
51+
try:
52+
return self.snapshot_dict[(example.repo, example.environment_setup_commit)]
53+
except KeyError:
54+
return None
55+
56+
async def save_snapshot_uid(self, example: SweBenchExample, snapshot_uid: str) -> None:
57+
self.snapshot_dict[(example.repo, example.environment_setup_commit)] = snapshot_uid

0 commit comments

Comments
 (0)