Add CI workflow and lightweight tests

haasonsaas · haasonsaas · commit 29f09cbd1c58 · 2025-11-08T10:33:25.000-08:00
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,21 @@
+name: CI
+
+on:
+  push:
+    branches: ["main"]
+  pull_request:
+
+jobs:
+  tests:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements-test.txt
+      - name: Run tests
+        run: pytest --maxfail=1 --disable-warnings
diff --git a/README.md b/README.md
@@ -5,8 +5,8 @@ This repository implements a Cursor-style tri-plane composed of an FP8 MoE train
 ## Architecture Overview
 
 - **Environment Fleet**: `envd/server.py` provides the gRPC tool surface (read/edit/search/lint/exec) and optional semantic search backed by Qdrant; Firecracker launch scripts in `scripts/firecracker/` create snapshot-based microVMs.
-- **Inference**: `inference/serve.py` bootstraps Ray actors (controller, samplers, env clients) to execute parallel tool plans with straggler mitigation and speculative rollouts.
-- **Trainer**: `trainer/` contains a PPO loop over a lightweight MoE transformer policy, reward shaping utilities, and data helpers suitable for integration with DeepSpeed/Megatron FP8 stacks.
+- **Inference**: `inference/serve.py` bootstraps Ray actors (controller, samplers, env clients) to execute parallel tool plans with straggler mitigation and speculative rollouts, with pluggable samplers (stub or OpenAI-compatible vLLM backend) and rollout persistence (JSONL/S3/ClickHouse).
+- **Trainer**: `trainer/` contains a PPO loop over a lightweight MoE transformer policy plus a DeepSpeed/TransformerEngine FP8 training stack for large-scale runs.
 
 ## Getting Started
 
@@ -31,6 +31,22 @@ This repository implements a Cursor-style tri-plane composed of an FP8 MoE train
    ```bash
    ./experiments/run_training.sh
    ```
+6. **DeepSpeed MoE trainer** (requires NVIDIA + TransformerEngine/DeepSpeed):
+   ```bash
+   deepspeed --num_gpus=8 trainer/train_deepspeed.py --rollouts /data/rollouts.jsonl
+   ```
+
+## Testing & Continuous Integration
+
+1. Install lightweight test dependencies:
+   ```bash
+   pip install -r requirements-test.txt
+   ```
+2. Run the pytest suite:
+   ```bash
+   pytest
+   ```
+3. GitHub Actions workflow: `.github/workflows/ci.yml` executes the same test suite on pushes and pull requests.
 
 ## Firecracker Workflow
 
@@ -56,5 +72,5 @@ This repository implements a Cursor-style tri-plane composed of an FP8 MoE train
 ## Roadmap
 
 - Integrate semantic code search using a production-grade embedding model.
-- Replace the stub model sampler with a vLLM or serve-hosted policy endpoint.
-- Wire reward streaming to an external registry (e.g., ClickHouse + S3 checkpoint sync).
+- Replace the stub sampler with a production vLLM deployment and wire in live checkpoints.
+- Wire reward streaming to an external registry (e.g., ClickHouse + S3 checkpoint sync) for online PPO.
diff --git a/inference/__init__.py b/inference/__init__.py
@@ -1,3 +1,10 @@
-from .serve import bootstrap_ray
+from __future__ import annotations
+
+
+def bootstrap_ray(*args, **kwargs):
+    from .serve import bootstrap_ray as _bootstrap_ray
+
+    return _bootstrap_ray(*args, **kwargs)
+
 
 __all__ = ["bootstrap_ray"]
diff --git a/requirements-test.txt b/requirements-test.txt
@@ -0,0 +1,3 @@
+pytest>=8.0.0
+grpcio>=1.60.0
+httpx>=0.27.0
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -0,0 +1,6 @@
+import sys
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[1]
+if str(ROOT) not in sys.path:
+    sys.path.insert(0, str(ROOT))
diff --git a/tests/test_inference_model.py b/tests/test_inference_model.py
@@ -0,0 +1,18 @@
+import asyncio
+import json
+
+from inference.config import SamplerConfig
+from inference.model import create_sampler, ensure_json_plan
+
+
+def test_stub_sampler_emits_plan():
+    sampler = create_sampler(SamplerConfig(kind="stub"))
+    plan_json = asyncio.run(sampler.sample("Investigate flake"))
+    plan = json.loads(plan_json)
+    assert "parallel" in plan or "then" in plan
+
+
+def test_ensure_json_plan_handles_invalid_json():
+    repaired = asyncio.run(ensure_json_plan("not-json"))
+    plan = json.loads(repaired)
+    assert "then" in plan
diff --git a/tests/test_moe_deepspeed.py b/tests/test_moe_deepspeed.py
@@ -0,0 +1,14 @@
+import pytest
+
+pytest.importorskip("torch")
+
+from trainer.moe_deepspeed import HAS_DEEPSPEED, build_moe
+
+
+def test_build_moe_guard():
+    if not HAS_DEEPSPEED:
+        with pytest.raises(RuntimeError):
+            build_moe(64, 128, 2, 1.25)
+    else:  # pragma: no cover - exercised only when deepspeed is available
+        layer = build_moe(64, 128, 2, 1.25)
+        assert layer.num_experts == 2
diff --git a/tests/test_storage.py b/tests/test_storage.py
@@ -0,0 +1,18 @@
+import asyncio
+import json
+
+from inference.config import StorageConfig
+from inference.storage import JSONLWriter, create_storage
+
+
+def test_jsonl_writer_persists_records(tmp_path):
+    target = tmp_path / "rollouts.jsonl"
+    writer = JSONLWriter(str(target))
+    asyncio.run(writer.write([{"prompt": "fix bug", "latency": 1.2}]))
+    content = target.read_text().strip().splitlines()
+    assert json.loads(content[0])["prompt"] == "fix bug"
+
+
+def test_create_storage_noop_when_missing_bucket():
+    writer = create_storage(StorageConfig(kind="s3", s3_bucket=None))
+    assert writer.__class__.__name__ == "NoOpWriter"
diff --git a/trainer/__init__.py b/trainer/__init__.py
@@ -1,4 +1,12 @@
+from __future__ import annotations
+
 from .config import TrainConfig
-from .train import main
+
+
+def main():
+    from . import train as _train
+
+    _train.main()
+
 
 __all__ = ["TrainConfig", "main"]
diff --git a/trainer/moe_deepspeed.py b/trainer/moe_deepspeed.py
@@ -1,12 +1,27 @@
 from __future__ import annotations
 
+from contextlib import nullcontext
 from typing import Dict, Tuple
 
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from deepspeed.moe.layer import MoE
-from transformer_engine.pytorch import fp8_autocast
+
+try:
+    from deepspeed.moe.layer import MoE
+
+    HAS_DEEPSPEED = True
+except ImportError:  # pragma: no cover - optional dependency
+    MoE = None  # type: ignore[assignment]
+    HAS_DEEPSPEED = False
+
+try:
+    from transformer_engine.pytorch import fp8_autocast as _fp8_autocast
+
+    HAS_TRANSFORMER_ENGINE = True
+except ImportError:  # pragma: no cover - optional dependency
+    _fp8_autocast = None
+    HAS_TRANSFORMER_ENGINE = False
 
 from .config import DeepSpeedMoEConfig
 
@@ -21,7 +36,15 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         return self.fc2(F.gelu(self.fc1(x)))
 
 
+def _fp8_context(enabled: bool):
+    if enabled and HAS_TRANSFORMER_ENGINE and _fp8_autocast is not None:
+        return _fp8_autocast(enabled=enabled)
+    return nullcontext()
+
+
 def build_moe(hidden_size: int, ffn_hidden: int, num_experts: int, capacity_factor: float) -> MoE:
+    if not HAS_DEEPSPEED:
+        raise RuntimeError("DeepSpeed MoE is not available; install deepspeed to enable this module")
     return MoE(
         hidden_size=hidden_size,
         expert=ExpertMLP,
@@ -46,7 +69,7 @@ def __init__(self, cfg: DeepSpeedMoEConfig):
         self.fp8 = cfg.fp8
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-        with fp8_autocast(enabled=self.fp8):
+        with _fp8_context(self.fp8):
             attn_out, _ = self.attn(x, x, x, need_weights=False)
             x = self.ln1(x + self.dropout(attn_out))
             bsz, seqlen, hidden = x.shape
@@ -110,4 +133,4 @@ def build_engine_config(cfg: DeepSpeedMoEConfig, optimizer: Dict[str, float]) ->
     return base
 
 
-__all__ = ["DeepSpeedMoETransformer", "PPOPolicyValue", "build_engine_config"]
+__all__ = ["DeepSpeedMoETransformer", "PPOPolicyValue", "build_engine_config", "HAS_DEEPSPEED"]

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+pytest>=8.0.0`
	`2`	`+grpcio>=1.60.0`
	`3`	`+httpx>=0.27.0`