Skip to content

Commit 29f09cb

Browse files
committed
Add CI workflow and lightweight tests
1 parent b6a7157 commit 29f09cb

File tree

10 files changed

+144
-10
lines changed

10 files changed

+144
-10
lines changed

.github/workflows/ci.yml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
name: CI
2+
3+
on:
4+
push:
5+
branches: ["main"]
6+
pull_request:
7+
8+
jobs:
9+
tests:
10+
runs-on: ubuntu-latest
11+
steps:
12+
- uses: actions/checkout@v4
13+
- uses: actions/setup-python@v5
14+
with:
15+
python-version: "3.12"
16+
- name: Install dependencies
17+
run: |
18+
python -m pip install --upgrade pip
19+
pip install -r requirements-test.txt
20+
- name: Run tests
21+
run: pytest --maxfail=1 --disable-warnings

README.md

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ This repository implements a Cursor-style tri-plane composed of an FP8 MoE train
55
## Architecture Overview
66

77
- **Environment Fleet**: `envd/server.py` provides the gRPC tool surface (read/edit/search/lint/exec) and optional semantic search backed by Qdrant; Firecracker launch scripts in `scripts/firecracker/` create snapshot-based microVMs.
8-
- **Inference**: `inference/serve.py` bootstraps Ray actors (controller, samplers, env clients) to execute parallel tool plans with straggler mitigation and speculative rollouts.
9-
- **Trainer**: `trainer/` contains a PPO loop over a lightweight MoE transformer policy, reward shaping utilities, and data helpers suitable for integration with DeepSpeed/Megatron FP8 stacks.
8+
- **Inference**: `inference/serve.py` bootstraps Ray actors (controller, samplers, env clients) to execute parallel tool plans with straggler mitigation and speculative rollouts, with pluggable samplers (stub or OpenAI-compatible vLLM backend) and rollout persistence (JSONL/S3/ClickHouse).
9+
- **Trainer**: `trainer/` contains a PPO loop over a lightweight MoE transformer policy plus a DeepSpeed/TransformerEngine FP8 training stack for large-scale runs.
1010

1111
## Getting Started
1212

@@ -31,6 +31,22 @@ This repository implements a Cursor-style tri-plane composed of an FP8 MoE train
3131
```bash
3232
./experiments/run_training.sh
3333
```
34+
6. **DeepSpeed MoE trainer** (requires NVIDIA + TransformerEngine/DeepSpeed):
35+
```bash
36+
deepspeed --num_gpus=8 trainer/train_deepspeed.py --rollouts /data/rollouts.jsonl
37+
```
38+
39+
## Testing & Continuous Integration
40+
41+
1. Install lightweight test dependencies:
42+
```bash
43+
pip install -r requirements-test.txt
44+
```
45+
2. Run the pytest suite:
46+
```bash
47+
pytest
48+
```
49+
3. GitHub Actions workflow: `.github/workflows/ci.yml` executes the same test suite on pushes and pull requests.
3450

3551
## Firecracker Workflow
3652

@@ -56,5 +72,5 @@ This repository implements a Cursor-style tri-plane composed of an FP8 MoE train
5672
## Roadmap
5773

5874
- Integrate semantic code search using a production-grade embedding model.
59-
- Replace the stub model sampler with a vLLM or serve-hosted policy endpoint.
60-
- Wire reward streaming to an external registry (e.g., ClickHouse + S3 checkpoint sync).
75+
- Replace the stub sampler with a production vLLM deployment and wire in live checkpoints.
76+
- Wire reward streaming to an external registry (e.g., ClickHouse + S3 checkpoint sync) for online PPO.

inference/__init__.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1-
from .serve import bootstrap_ray
1+
from __future__ import annotations
2+
3+
4+
def bootstrap_ray(*args, **kwargs):
5+
from .serve import bootstrap_ray as _bootstrap_ray
6+
7+
return _bootstrap_ray(*args, **kwargs)
8+
29

310
__all__ = ["bootstrap_ray"]

requirements-test.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
pytest>=8.0.0
2+
grpcio>=1.60.0
3+
httpx>=0.27.0

tests/conftest.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import sys
2+
from pathlib import Path
3+
4+
ROOT = Path(__file__).resolve().parents[1]
5+
if str(ROOT) not in sys.path:
6+
sys.path.insert(0, str(ROOT))

tests/test_inference_model.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import asyncio
2+
import json
3+
4+
from inference.config import SamplerConfig
5+
from inference.model import create_sampler, ensure_json_plan
6+
7+
8+
def test_stub_sampler_emits_plan():
9+
sampler = create_sampler(SamplerConfig(kind="stub"))
10+
plan_json = asyncio.run(sampler.sample("Investigate flake"))
11+
plan = json.loads(plan_json)
12+
assert "parallel" in plan or "then" in plan
13+
14+
15+
def test_ensure_json_plan_handles_invalid_json():
16+
repaired = asyncio.run(ensure_json_plan("not-json"))
17+
plan = json.loads(repaired)
18+
assert "then" in plan

tests/test_moe_deepspeed.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
import pytest
2+
3+
pytest.importorskip("torch")
4+
5+
from trainer.moe_deepspeed import HAS_DEEPSPEED, build_moe
6+
7+
8+
def test_build_moe_guard():
9+
if not HAS_DEEPSPEED:
10+
with pytest.raises(RuntimeError):
11+
build_moe(64, 128, 2, 1.25)
12+
else: # pragma: no cover - exercised only when deepspeed is available
13+
layer = build_moe(64, 128, 2, 1.25)
14+
assert layer.num_experts == 2

tests/test_storage.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import asyncio
2+
import json
3+
4+
from inference.config import StorageConfig
5+
from inference.storage import JSONLWriter, create_storage
6+
7+
8+
def test_jsonl_writer_persists_records(tmp_path):
9+
target = tmp_path / "rollouts.jsonl"
10+
writer = JSONLWriter(str(target))
11+
asyncio.run(writer.write([{"prompt": "fix bug", "latency": 1.2}]))
12+
content = target.read_text().strip().splitlines()
13+
assert json.loads(content[0])["prompt"] == "fix bug"
14+
15+
16+
def test_create_storage_noop_when_missing_bucket():
17+
writer = create_storage(StorageConfig(kind="s3", s3_bucket=None))
18+
assert writer.__class__.__name__ == "NoOpWriter"

trainer/__init__.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,12 @@
1+
from __future__ import annotations
2+
13
from .config import TrainConfig
2-
from .train import main
4+
5+
6+
def main():
7+
from . import train as _train
8+
9+
_train.main()
10+
311

412
__all__ = ["TrainConfig", "main"]

trainer/moe_deepspeed.py

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,27 @@
11
from __future__ import annotations
22

3+
from contextlib import nullcontext
34
from typing import Dict, Tuple
45

56
import torch
67
import torch.nn as nn
78
import torch.nn.functional as F
8-
from deepspeed.moe.layer import MoE
9-
from transformer_engine.pytorch import fp8_autocast
9+
10+
try:
11+
from deepspeed.moe.layer import MoE
12+
13+
HAS_DEEPSPEED = True
14+
except ImportError: # pragma: no cover - optional dependency
15+
MoE = None # type: ignore[assignment]
16+
HAS_DEEPSPEED = False
17+
18+
try:
19+
from transformer_engine.pytorch import fp8_autocast as _fp8_autocast
20+
21+
HAS_TRANSFORMER_ENGINE = True
22+
except ImportError: # pragma: no cover - optional dependency
23+
_fp8_autocast = None
24+
HAS_TRANSFORMER_ENGINE = False
1025

1126
from .config import DeepSpeedMoEConfig
1227

@@ -21,7 +36,15 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
2136
return self.fc2(F.gelu(self.fc1(x)))
2237

2338

39+
def _fp8_context(enabled: bool):
40+
if enabled and HAS_TRANSFORMER_ENGINE and _fp8_autocast is not None:
41+
return _fp8_autocast(enabled=enabled)
42+
return nullcontext()
43+
44+
2445
def build_moe(hidden_size: int, ffn_hidden: int, num_experts: int, capacity_factor: float) -> MoE:
46+
if not HAS_DEEPSPEED:
47+
raise RuntimeError("DeepSpeed MoE is not available; install deepspeed to enable this module")
2548
return MoE(
2649
hidden_size=hidden_size,
2750
expert=ExpertMLP,
@@ -46,7 +69,7 @@ def __init__(self, cfg: DeepSpeedMoEConfig):
4669
self.fp8 = cfg.fp8
4770

4871
def forward(self, x: torch.Tensor) -> torch.Tensor:
49-
with fp8_autocast(enabled=self.fp8):
72+
with _fp8_context(self.fp8):
5073
attn_out, _ = self.attn(x, x, x, need_weights=False)
5174
x = self.ln1(x + self.dropout(attn_out))
5275
bsz, seqlen, hidden = x.shape
@@ -110,4 +133,4 @@ def build_engine_config(cfg: DeepSpeedMoEConfig, optimizer: Dict[str, float]) ->
110133
return base
111134

112135

113-
__all__ = ["DeepSpeedMoETransformer", "PPOPolicyValue", "build_engine_config"]
136+
__all__ = ["DeepSpeedMoETransformer", "PPOPolicyValue", "build_engine_config", "HAS_DEEPSPEED"]

0 commit comments

Comments
 (0)