Expand CI-safe test coverage

haasonsaas · haasonsaas · commit 407cdb007f76 · 2025-11-08T10:38:30.000-08:00
diff --git a/inference/serve.py b/inference/serve.py
@@ -5,7 +5,7 @@
 import random
 import time
 from dataclasses import asdict, dataclass
-from typing import Any, Dict, Iterable, List
+from typing import Any, Dict, Iterable, List, Tuple
 
 import ray
 import grpc
@@ -14,7 +14,7 @@
 from envd.generated import tools_pb2, tools_pb2_grpc
 from .config import SamplerConfig, StorageConfig
 from .model import create_sampler, ensure_json_plan
-from .storage import create_storage
+from .storage import create_storage, write_rollout_records
 
 
 @dataclass(slots=True)
@@ -148,7 +148,7 @@ async def batch_rollouts(self, prompts: List[str], *, replicas: int = 3, timeout
                 prompt_map[ref] = prompt
 
         deadline = time.perf_counter() + timeout_s
-        results: List[Dict[str, Any]] = []
+        pairs: List[Tuple[str, Dict[str, Any]]] = []
         pending = list(object_refs)
 
         while pending and time.perf_counter() < deadline:
@@ -160,16 +160,14 @@ async def batch_rollouts(self, prompts: List[str], *, replicas: int = 3, timeout
                     result = await ref
                 except Exception as exc:  # noqa: BLE001
                     result = {"error": str(exc)}
-                record = {"prompt": prompt, "result": result, "timestamp_s": time.time()}
-                results.append(record)
+                pairs.append((prompt, result))
                 prompt_map.pop(ref, None)
 
         for ref in pending:
             ray.cancel(ref, force=True)
 
-        if results:
-            await self._storage.write(results)
-        return results
+        records = await write_rollout_records(self._storage, pairs)
+        return records
 
 
 def bootstrap_ray(env_hosts: List[str], *, num_samplers: int = 2):
diff --git a/inference/storage.py b/inference/storage.py
@@ -4,7 +4,8 @@
 import json
 import uuid
 from pathlib import Path
-from typing import Any, Dict, Iterable
+import time
+from typing import Any, Dict, Iterable, List, Tuple, Callable
 
 from .config import StorageConfig
 
@@ -60,6 +61,25 @@ async def write(self, records: Iterable[Dict[str, Any]]) -> None:
         return None
 
 
+def build_rollout_records(entries: Iterable[Tuple[str, Dict[str, Any]]], now: Callable[[], float] | None = None) -> List[Dict[str, Any]]:
+    timestamp = now or time.time
+    return [
+        {
+            "prompt": prompt,
+            "result": result,
+            "timestamp_s": timestamp(),
+        }
+        for prompt, result in entries
+    ]
+
+
+async def write_rollout_records(storage: StorageWriter, entries: Iterable[Tuple[str, Dict[str, Any]]], now: Callable[[], float] | None = None) -> List[Dict[str, Any]]:
+    records = build_rollout_records(entries, now=now)
+    if records:
+        await storage.write(records)
+    return records
+
+
 def create_storage(cfg: StorageConfig) -> StorageWriter:
     try:
         if cfg.kind == "s3" and cfg.s3_bucket:
@@ -73,4 +93,4 @@ def create_storage(cfg: StorageConfig) -> StorageWriter:
     return NoOpWriter()
 
 
-__all__ = ["create_storage", "StorageWriter"]
+__all__ = ["create_storage", "StorageWriter", "build_rollout_records", "write_rollout_records"]
diff --git a/tests/test_controller_bridge.py b/tests/test_controller_bridge.py
@@ -0,0 +1,25 @@
+import asyncio
+
+from inference.storage import build_rollout_records, write_rollout_records
+
+
+class InMemoryStorage:
+    def __init__(self):
+        self.records = None
+
+    async def write(self, records):
+        self.records = list(records)
+
+
+def test_build_rollout_records_structure():
+    records = build_rollout_records([("prompt-1", {"ok": True})], now=lambda: 123.0)
+    assert records[0]["prompt"] == "prompt-1"
+    assert records[0]["result"] == {"ok": True}
+    assert records[0]["timestamp_s"] == 123.0
+
+
+def test_write_rollout_records_persists():
+    storage = InMemoryStorage()
+    records = asyncio.run(write_rollout_records(storage, [("prompt-2", {"latency": 1.0})], now=lambda: 456.0))
+    assert storage.records == records
+    assert records[0]["timestamp_s"] == 456.0
diff --git a/tests/test_reward.py b/tests/test_reward.py
@@ -0,0 +1,23 @@
+from trainer.reward import compute_reward
+
+
+def test_compute_reward_balances_signals():
+    metrics = {
+        "tests_passed": 1,
+        "lint_improvement": 0.5,
+        "parallel_groups": 3,
+        "regressions": 0,
+    }
+    reward = compute_reward(metrics, latency_s=10.0)
+    assert reward > 0
+
+
+def test_compute_reward_penalizes_regressions():
+    metrics = {
+        "tests_passed": 0,
+        "lint_improvement": 0,
+        "parallel_groups": 0,
+        "regressions": 2,
+    }
+    reward = compute_reward(metrics, latency_s=5.0)
+    assert reward < 0
diff --git a/tests/test_scripts.py b/tests/test_scripts.py
@@ -0,0 +1,17 @@
+import subprocess
+from pathlib import Path
+
+import pytest
+
+SCRIPTS = [
+    Path("scripts/firecracker/build_base.sh"),
+    Path("scripts/firecracker/create_template.sh"),
+    Path("scripts/firecracker/launch_envs.sh"),
+]
+
+
+@pytest.mark.parametrize("script_path", SCRIPTS)
+def test_firecracker_scripts_shellcheck(script_path):
+    full_path = Path(__file__).resolve().parents[1] / script_path
+    result = subprocess.run(["bash", "-n", str(full_path)], capture_output=True, text=True)
+    assert result.returncode == 0, result.stderr
diff --git a/tests/test_vllm_sampler.py b/tests/test_vllm_sampler.py
@@ -0,0 +1,40 @@
+import asyncio
+import json
+
+import httpx
+import pytest
+
+from inference.config import SamplerConfig
+from inference.model import create_sampler
+
+
+class DummyResponse:
+    def __init__(self, payload):
+        self._payload = payload
+
+    def raise_for_status(self):
+        return None
+
+    def json(self):
+        return self._payload
+
+
+@pytest.mark.asyncio
+async def test_vllm_sampler_formats_plan(monkeypatch):
+    cfg = SamplerConfig(kind="vllm-openai", vllm_rpc_host="localhost")
+    sampler = create_sampler(cfg)
+
+    async def fake_post(self, url, json):
+        assert "Fix bug" in json["prompt"]
+        return DummyResponse({"choices": [{"text": "{\"then\": []}"}]})
+
+    async def fake_close(self):
+        return None
+
+    monkeypatch.setattr(httpx.AsyncClient, "post", fake_post, raising=False)
+    monkeypatch.setattr(httpx.AsyncClient, "aclose", fake_close, raising=False)
+
+    plan = await sampler.sample("Fix bug")
+    assert json.loads(plan)["then"] == []
+
+    await sampler.close()