convert to modal.Dict snapshot manager

clee-codegen · clee-codegen · commit 4177e082a1c4 · 2025-02-26T23:54:08.000-08:00
diff --git a/codegen-examples/examples/swebench_agent_run/run_eval.py b/codegen-examples/examples/swebench_agent_run/run_eval.py
@@ -1,13 +1,18 @@
 import asyncio
 import json
 import traceback
-from pathlib import Path
 import uuid
-import modal
-import click
 from datetime import datetime
-from codegen.extensions.swebench.utils import SWEBenchDataset, SweBenchExample, get_swe_bench_examples
+from pathlib import Path
+
+import click
+import modal
 from codegen.extensions.swebench.report import generate_report
+from codegen.extensions.swebench.utils import (
+    SWEBenchDataset,
+    SweBenchExample,
+    get_swe_bench_examples,
+)
 
 PREDS_DNAME = Path(__file__).parent / "predictions"
 LOG_DIR = Path(__file__).parent / "logs"
@@ -61,11 +66,26 @@ async def process_batch(examples: list[SweBenchExample], batch_size=10):
                     print("Traceback:")
                     print("".join(error_info["traceback"]))
 
-                    results.append({"instance_id": example.instance_id, "status": "error", "error_info": error_info})
+                    results.append(
+                        {
+                            "instance_id": example.instance_id,
+                            "status": "error",
+                            "error_info": error_info,
+                        }
+                    )
                 else:
                     if result is None:
                         print(f"Warning: Null result for {example.instance_id}")
-                        results.append({"instance_id": example.instance_id, "status": "error", "error_info": {"error_type": "NullResult", "error_message": "Process returned None"}})
+                        results.append(
+                            {
+                                "instance_id": example.instance_id,
+                                "status": "error",
+                                "error_info": {
+                                    "error_type": "NullResult",
+                                    "error_message": "Process returned None",
+                                },
+                            }
+                        )
                     else:
                         results.append(result)
 
@@ -81,14 +101,24 @@ async def process_batch(examples: list[SweBenchExample], batch_size=10):
                     {
                         "instance_id": example.instance_id,
                         "status": "error",
-                        "error_info": {"error_type": type(e).__name__, "error_message": str(e), "traceback": traceback.format_exc(), "batch_failure": True},
+                        "error_info": {
+                            "error_type": type(e).__name__,
+                            "error_message": str(e),
+                            "traceback": traceback.format_exc(),
+                            "batch_failure": True,
+                        },
                     }
                 )
 
     return results
 
 
-async def run_eval(use_existing_preds: str | None, dataset: str, length: int, instance_id: str | None = None):
+async def run_eval(
+    use_existing_preds: str | None,
+    dataset: str,
+    length: int,
+    instance_id: str | None = None,
+):
     run_id = use_existing_preds or str(uuid.uuid4())
     predictions_dir = PREDS_DNAME / f"results_{run_id}"
     dataset = SWEBenchDataset(dataset)
@@ -155,10 +185,25 @@ async def run_eval(use_existing_preds: str | None, dataset: str, length: int, in
 
 
 @click.command()
-@click.option("--use-existing-preds", help="The run ID of the existing predictions to use.", type=str, default=None)
-@click.option("--dataset", help="The dataset to use.", type=click.Choice([dataset.value for dataset in SWEBenchDataset]), default=SWEBenchDataset.LITE.value)
+@click.option(
+    "--use-existing-preds",
+    help="The run ID of the existing predictions to use.",
+    type=str,
+    default=None,
+)
+@click.option(
+    "--dataset",
+    help="The dataset to use.",
+    type=click.Choice([dataset.value for dataset in SWEBenchDataset]),
+    default=SWEBenchDataset.LITE.value,
+)
 @click.option("--length", help="The number of examples to process.", type=int, default=10)
-@click.option("--instance-id", help="The instance ID of the example to process.", type=str, default=None)
+@click.option(
+    "--instance-id",
+    help="The instance ID of the example to process.",
+    type=str,
+    default=None,
+)
 def run_eval_command(use_existing_preds, dataset, length, instance_id):
     asyncio.run(run_eval(use_existing_preds, dataset, length, instance_id))
 
diff --git a/codegen-examples/examples/swebench_agent_run/sandbox.py b/codegen-examples/examples/swebench_agent_run/sandbox.py
@@ -0,0 +1,74 @@
+from contextlib import asynccontextmanager
+
+import modal
+from codegen.extensions.swebench.utils import SweBenchExample
+
+from .snapshot_manager import (
+    ModalDictSnapshotManager,
+    SnapshotManager,
+)
+
+BASE_IMAGE: modal.Image = modal.Image.debian_slim(python_version="3.13").apt_install("git")
+
+try:
+    # To ensure secrets are consistent across runs, we look up existing secret
+    secret = modal.Secret.from_name("swebench-agent-run-secrets")
+except modal.exception.NotFoundError:
+    secret = modal.Secret.from_dotenv()
+
+app = modal.App.lookup(name="swebench-agent-run", create_if_missing=True)
+
+
+class SandboxManager:
+    keep_alive: bool
+
+    def __init__(
+        self,
+        keep_alive: bool = False,
+        snapshot_manager: SnapshotManager | None = None,
+    ):
+        self.keep_alive = keep_alive
+        self.snapshot_manager = snapshot_manager or ModalDictSnapshotManager()
+
+    async def create_sandbox(self, example: SweBenchExample) -> modal.Sandbox:
+        existing_snapshot_uid = await self.snapshot_manager.get_snapshot_uid(example)
+        if existing_snapshot_uid:
+            return await modal.Sandbox._experimental_from_snapshot(existing_snapshot_uid)
+
+        # TODO: test if this get local version works / add ability to install specific version
+        with modal.enable_output():
+            return await modal.Sandbox.create(
+                app=app,
+                image=BASE_IMAGE.run_commands(f"cd /root; git clone {example.repo} && cd {example.repo} && git checkout {example.environment_setup_commit}"),
+                secrets=[secret],
+                tags={"repo": example.repo, "commit": example.environment_setup_commit},
+            )
+
+    @asynccontextmanager
+    async def get_sandbox(self, example: SweBenchExample):
+        async for sandbox in modal.Sandbox.list(
+            app_id=app.app_id,
+            tags={"repo": example.repo, "commit": example.environment_setup_commit},
+        ):
+            break
+        else:
+            sandbox = await self.create_sandbox(example)
+
+        try:
+            await sandbox.wait()
+            yield sandbox
+        finally:
+            if not self.keep_alive:
+                # Killing sandbox, so take a snapshot and save it
+                await sandbox.exec(
+                    "bash",
+                    "-c",
+                    f"cd /root/{example.repo}; git stash",  # cheeky little stash
+                )
+                snapshot = await sandbox._experimental_snapshot()  # commit any codegen updates
+
+                await self.snapshot_manager.save_snapshot_uid(example, snapshot.object_id)
+
+                # Codebase.from_repo doesn't use git to fetch/checkout the repo.
+                # We could replace this with our own git commands to control the file state
+                await sandbox.terminate()
diff --git a/codegen-examples/examples/swebench_agent_run/snapshot_manager.py b/codegen-examples/examples/swebench_agent_run/snapshot_manager.py
@@ -0,0 +1,57 @@
+import io
+import json
+from collections import defaultdict
+
+import modal
+from codegen.extensions.swebench.utils import SweBenchExample
+
+
+class SnapshotManager:
+    async def get_snapshot_uid(self, example: SweBenchExample) -> str:
+        raise NotImplementedError("Not implemented")
+
+    async def save_snapshot_uid(self, example: SweBenchExample, snapshot_uid: str) -> None:
+        raise NotImplementedError("Not implemented")
+
+
+class VolumeSnapshotManager(SnapshotManager):
+    def __init__(self, volume_name: str = "swebench-agent-snapshot-volume"):
+        self.snapshot_volume = modal.Volume.from_name(volume_name, create_if_missing=True)
+        self.snapshot_meta_file_path: str = "/root/snapshot_meta.json"
+
+    async def get_snapshot_uid(self, example: SweBenchExample) -> str:
+        snapshot_meta = await self.read_snapshot_meta()
+        return snapshot_meta[example.repo][example.base_commit]
+
+    async def save_snapshot_uid(self, example: SweBenchExample, snapshot_uid: str) -> None:
+        snapshot_meta = await self.read_snapshot_meta()
+        snapshot_meta[example.repo][example.environment_setup_commit] = snapshot_uid
+        async with self.snapshot_volume.batch_upload() as upload:
+            await upload.put_file(
+                io.BytesIO(json.dumps(snapshot_meta).encode("utf-8")),
+                self.snapshot_meta_file_path,
+            )
+        await self.snapshot_volume.commit()
+
+    async def read_snapshot_meta(self) -> dict[str, dict[str, str]]:
+        bytes_io = io.BytesIO()
+        try:
+            await self.snapshot_volume.read_file_into_fileobj(self.snapshot_meta_file_path, bytes_io)
+            snapshot_meta = json.loads(bytes_io.getvalue().decode("utf-8"))
+        except FileNotFoundError:
+            snapshot_meta = {}
+        return defaultdict(lambda: defaultdict(lambda: None), snapshot_meta)
+
+
+class ModalDictSnapshotManager(SnapshotManager):
+    def __init__(self, name: str = "swebench-agent-snapshot-dict"):
+        self.snapshot_dict = modal.Dict.from_name(name, create_if_missing=True)
+
+    async def get_snapshot_uid(self, example: SweBenchExample) -> str | None:
+        try:
+            return self.snapshot_dict[(example.repo, example.environment_setup_commit)]
+        except KeyError:
+            return None
+
+    async def save_snapshot_uid(self, example: SweBenchExample, snapshot_uid: str) -> None:
+        self.snapshot_dict[(example.repo, example.environment_setup_commit)] = snapshot_uid