|
| 1 | +import { execFile } from "node:child_process"; |
| 2 | +import fs from "node:fs/promises"; |
| 3 | +import os from "node:os"; |
| 4 | +import path from "node:path"; |
| 5 | +import { fileURLToPath } from "node:url"; |
| 6 | +import { promisify } from "node:util"; |
| 7 | +import { afterEach, describe, expect, it } from "vitest"; |
| 8 | + |
| 9 | +const execFileAsync = promisify(execFile); |
| 10 | +const tempDirs: string[] = []; |
| 11 | +const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../.."); |
| 12 | +const innovationLoopScript = path.join(repoRoot, "scripts", "innovation_loop.py"); |
| 13 | + |
| 14 | +async function makeWorkspace(): Promise<{ workspace: string; configPath: string; fakeBin: string }> { |
| 15 | + const workspace = await fs.mkdtemp(path.join(os.tmpdir(), "auto-exp-specialist-fail-")); |
| 16 | + tempDirs.push(workspace); |
| 17 | + await fs.mkdir(path.join(workspace, "configs"), { recursive: true }); |
| 18 | + await fs.mkdir(path.join(workspace, "src"), { recursive: true }); |
| 19 | + await fs.mkdir(path.join(workspace, "data"), { recursive: true }); |
| 20 | + await fs.cp(path.join(repoRoot, "fixtures", "kb", "vault"), path.join(workspace, "vault"), { recursive: true }); |
| 21 | + const fakeBin = path.join(workspace, "fake-bin"); |
| 22 | + await fs.mkdir(fakeBin, { recursive: true }); |
| 23 | + await fs.writeFile(path.join(workspace, "src", "config.json"), JSON.stringify({ objective_mode: "baseline" }, null, 2) + "\n", "utf8"); |
| 24 | + await fs.writeFile(path.join(workspace, "src", "strategy.txt"), "baseline\n", "utf8"); |
| 25 | + await fs.writeFile(path.join(workspace, "src", "module.ts"), "export const variant = 0;\n", "utf8"); |
| 26 | + await fs.writeFile(path.join(workspace, "data", "observations.csv"), "split,value\ntrain,1\n", "utf8"); |
| 27 | + await fs.writeFile(path.join(workspace, "evaluate.py"), "print(0.8)\n", "utf8"); |
| 28 | + await fs.writeFile(path.join(workspace, "configs", "research_brain.yaml"), [`vault_root: ${path.join(workspace, "vault")}`, "index_output_dir: experiments/research/index", "retrieval_cache_dir: experiments/research/retrieval-cache", "evidence_output_dir: experiments/research", "feedback_output: experiments/research/paper-feedback.jsonl", "posterior_rank_output: experiments/research/posterior-rank.json", "paper_id_map_output: experiments/research/paper-id-map.jsonl", "frontier_map_output: experiments/research/index/frontier-map.json"].join("\n") + "\n", "utf8"); |
| 29 | + await fs.writeFile(path.join(fakeBin, "opencode"), `#!/usr/bin/env python3 |
| 30 | +import sys |
| 31 | +args = sys.argv[1:] |
| 32 | +agent = args[args.index("--agent") + 1] if "--agent" in args else None |
| 33 | +if agent == "Apollo": |
| 34 | + print("not-json-response") |
| 35 | +else: |
| 36 | + print('{"ok": true}') |
| 37 | +`, "utf8"); |
| 38 | + await fs.chmod(path.join(fakeBin, "opencode"), 0o755); |
| 39 | + await fs.writeFile(path.join(workspace, "configs", "goal.yaml"), ['goal_text: "test"', 'target_metric: "score"', 'metric_direction: "maximize"'].join("\n") + "\n", "utf8"); |
| 40 | + return { workspace, configPath: path.join(workspace, "configs", "goal.yaml"), fakeBin }; |
| 41 | +} |
| 42 | + |
| 43 | +afterEach(async () => { |
| 44 | + await Promise.all(tempDirs.splice(0).map((dir) => fs.rm(dir, { recursive: true, force: true, maxRetries: 10, retryDelay: 200 }))); |
| 45 | +}); |
| 46 | + |
| 47 | +describe("specialist schema failure", () => { |
| 48 | + it("fails loudly when a live specialist returns invalid JSON", async () => { |
| 49 | + const { workspace, configPath, fakeBin } = await makeWorkspace(); |
| 50 | + const env = { ...process.env, PATH: `${fakeBin}:${process.env.PATH ?? ""}`, INNOVATION_LOOP_OPENCODE_DIR: repoRoot, INNOVATION_LOOP_AGENT_MODEL: "kimi-for-coding/kimi-k2.5", INNOVATION_LOOP_DISABLE_REAL_DVC: "1" }; |
| 51 | + await execFileAsync("python3", [innovationLoopScript, "bootstrap", "--config", configPath, "--workspace", workspace, "--mode", "live"], { cwd: workspace, env }); |
| 52 | + await execFileAsync("python3", [innovationLoopScript, "tick", "--config", configPath, "--workspace", workspace, "--mode", "live"], { cwd: workspace, env }); |
| 53 | + await expect(execFileAsync("python3", [innovationLoopScript, "tick", "--config", configPath, "--workspace", workspace, "--mode", "live"], { cwd: workspace, env })).rejects.toThrow(); |
| 54 | + const failureDir = path.join(workspace, "experiments", "live-specialist-failures"); |
| 55 | + const files = await fs.readdir(failureDir); |
| 56 | + expect(files.length).toBeGreaterThan(0); |
| 57 | + }, 15000); |
| 58 | +}); |
0 commit comments