Skip to content

Commit b6ad7a4

Browse files
test: align public repo with latest controller and brain flows
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
1 parent 28e711f commit b6ad7a4

9 files changed

+41
-21
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,5 +22,6 @@ experiments/research/paper-id-map.jsonl
2222
dvclive/
2323
.DS_Store
2424
vault/
25+
vault
2526
*.log
2627
*.pyc

tests/analysis/schema.test.ts

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,25 @@
11
import { describe, expect, it } from "vitest";
22
import resultPacket from "../../fixtures/results/result-packet-good.json";
3-
import { proposalCardSchema } from "../../src/analysis/proposal-card";
4-
import { resultPacketSchema } from "../../src/analysis/result-packet";
3+
import { proposalCardSchema, proposalContractSchema } from "../../src/analysis/proposal-card";
4+
import { controllerSessionSchema, resultPacketSchema } from "../../src/analysis/result-packet";
55

66
describe("analysis schemas", () => {
77
it("accepts a valid result packet and rejects invalid proposal cards", () => {
88
expect(() => resultPacketSchema.parse(resultPacket)).not.toThrow();
9+
expect(() =>
10+
proposalContractSchema.parse({
11+
family: "objective.loss",
12+
mechanism: "对目标函数做正则化,预期先改善中间稳定性指标,再影响目标指标。",
13+
redirect_if_underperforming: "切换到表征路线",
14+
}),
15+
).not.toThrow();
16+
expect(() =>
17+
controllerSessionSchema.parse({
18+
session_id: "s1",
19+
stage: "ready_to_execute",
20+
direction_memory_v2: { "objective.loss|generic-underperform": { "repr.feature": { weight: 1.0, confidence: 0.5 } } },
21+
}),
22+
).not.toThrow();
923
expect(() =>
1024
proposalCardSchema.parse({
1125
proposal_id: "bad",

tests/e2e/e2e.test.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,12 @@ async function makeWorkspace(): Promise<string> {
1515
await fs.writeFile(path.join(dir, "src", "config.json"), '{"learning_rate":0.1}\n', "utf8");
1616
await fs.writeFile(path.join(dir, "src", "strategy.txt"), "baseline\n", "utf8");
1717
await fs.writeFile(path.join(dir, "src", "module.ts"), "export const variant = 0;\n", "utf8");
18+
await fs.writeFile(path.join(dir, "evaluate.py"), "print(0.93)\n", "utf8");
1819
return dir;
1920
}
2021

2122
afterEach(async () => {
22-
await Promise.all(tempDirs.splice(0).map((dir) => fs.rm(dir, { recursive: true, force: true })));
23+
await Promise.all(tempDirs.splice(0).map((dir) => fs.rm(dir, { recursive: true, force: true, maxRetries: 10, retryDelay: 200 })));
2324
});
2425

2526
describe("local e2e", () => {
@@ -71,5 +72,5 @@ describe("local e2e", () => {
7172
]),
7273
);
7374
expect(status.best.current_best.metric).toBeGreaterThan(0.5);
74-
});
75+
}, 30000);
7576
});

tests/e2e/python-controller-real-dvc.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ const innovationLoopScript = path.join(repoRoot, "scripts", "innovation_loop.py"
1313
const hasRealDvc = spawnSync("python3", ["-c", "import shutil,sys; sys.exit(0 if shutil.which('dvc') else 1)"], {
1414
cwd: repoRoot,
1515
}).status === 0;
16-
const describeIfRealDvc = hasRealDvc && process.env.RUN_REAL_DVC_TESTS === "1" ? describe : describe.skip;
16+
const describeIfRealDvc = hasRealDvc ? describe : describe.skip;
1717

1818
async function makeWorkspace(): Promise<{ workspace: string; configPath: string }> {
1919
const workspace = await fs.mkdtemp(path.join(os.tmpdir(), "auto-exp-python-controller-real-dvc-"));

tests/e2e/research-brain-direction-memory.test.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,11 +58,12 @@ describe("research brain direction memory", () => {
5858
},
5959
};
6060
session.direction_memory_v2 = {
61-
"objective.loss|generic-underperform": {
61+
"objective.loss|generic-underperform|loss_shape->optimization_stability->target_metric": {
6262
"repr.feature": {
6363
weight: 1.5,
6464
last_round: 1,
6565
reason: "停止重复 objective.loss,转向 repr.feature",
66+
metric_path_signature: "loss_shape->optimization_stability->target_metric",
6667
success_count: 2,
6768
failure_count: 1,
6869
crash_count: 0,
@@ -72,6 +73,7 @@ describe("research brain direction memory", () => {
7273
weight: 1.8,
7374
last_round: 1,
7475
reason: "停止重复 objective.loss,转向 arch.backbone",
76+
metric_path_signature: "loss_shape->optimization_stability->target_metric",
7577
success_count: 0,
7678
failure_count: 3,
7779
crash_count: 1,
@@ -84,5 +86,5 @@ describe("research brain direction memory", () => {
8486
await execFileAsync("python3", [innovationLoopScript, "tick", "--config", configPath, "--workspace", workspace, "--mode", "mock"], { cwd: workspace });
8587
const proposals = JSON.parse(await fs.readFile(path.join(workspace, "experiments", "proposals", "round-0001.json"), "utf8"));
8688
expect(proposals.next_primary_hypothesis.family).toBe("repr.feature");
87-
});
89+
}, 15000);
8890
});

tests/e2e/research-brain-loop-integration.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,5 +67,5 @@ describe("research brain loop integration", () => {
6767
expect(proposals.next_primary_hypothesis.causal_metric_path).toBeTruthy();
6868
expect(proposals.next_primary_hypothesis.failure_signature).toBeTruthy();
6969
expect(proposals.next_primary_hypothesis.pivot_after_failure).toBeTruthy();
70-
});
70+
}, 15000);
7171
});

tests/e2e/research-brain-mock-redirect-selection.test.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,10 @@ describe("research brain mock redirect selection", () => {
4949
const { workspace, configPath } = await makeWorkspace();
5050
await execFileAsync("python3", [innovationLoopScript, "bootstrap", "--config", configPath, "--workspace", workspace, "--mode", "mock"], { cwd: workspace });
5151
const attemptsPath = path.join(workspace, "experiments", "attempts.jsonl");
52-
await fs.writeFile(attemptsPath, JSON.stringify({ kind: "candidate", family: "objective.loss", decision: "discard", redirect_if_underperforming: "停止重复 objective.loss,转向 repr.feature" }) + "\n", "utf8");
52+
await fs.writeFile(attemptsPath, JSON.stringify({ kind: "candidate", family: "objective.loss", decision: "discard", redirect_if_underperforming: "停止重复 objective.loss,转向 repr.feature", failure_signature: "loss path stalled", causal_metric_path: ["loss_shape", "optimization_stability", "target_metric"] }) + "\n", "utf8");
5353
await execFileAsync("python3", [innovationLoopScript, "tick", "--config", configPath, "--workspace", workspace, "--mode", "mock"], { cwd: workspace });
5454
await execFileAsync("python3", [innovationLoopScript, "tick", "--config", configPath, "--workspace", workspace, "--mode", "mock"], { cwd: workspace });
5555
const proposals = JSON.parse(await fs.readFile(path.join(workspace, "experiments", "proposals", "round-0001.json"), "utf8"));
5656
expect(proposals.next_primary_hypothesis.family).toBe("repr.feature");
57-
});
57+
}, 15000);
5858
});

tests/kb/retrieve-papers.test.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,11 @@ describe("kb retrieve papers", () => {
4242
expect(result.innovation_briefs.apollo.support_mech_id).toBeTruthy();
4343
expect(result.innovation_briefs.apollo.compatibility_score).toBeGreaterThan(0);
4444
expect(result.innovation_briefs.apollo.lead_unit.mechanism_verb).toBeTruthy();
45+
expect(Array.isArray(result.innovation_briefs.apollo.causal_metric_path)).toBe(true);
46+
expect(result.innovation_briefs.apollo.causal_metric_path.length).toBeGreaterThan(1);
4547
expect(result.innovation_briefs.athena.guardrails.length).toBeGreaterThan(0);
4648
expect(result.selected[0].mechanism_units.length).toBeGreaterThan(0);
49+
expect(result.selected[0].metric_paths.length).toBeGreaterThan(0);
4750
expect(result.selected[0].mechanism_units[0].intervention).not.toContain("作者解决了什么问题");
4851
expect(result.selected[0].mechanism_units[0].intervention).not.toBe("1.");
4952
expect(result.selected[0].mechanism_units[0].action_sentence.startsWith("对")).toBe(true);

tests/orchestration/workflow.test.ts

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ import os from "node:os";
33
import path from "node:path";
44
import { afterEach, describe, expect, it } from "vitest";
55
import loopSpec from "../../fixtures/specs/loop-max-3.json";
6-
import { runGovernedExperimentWorkflow } from "../../src/orchestration/workflow";
76
import type { ExperimentSpec } from "../../src/spec/schema";
7+
import { experiment_run_governed_workflow, experiment_init } from "../../src/tools";
88
import { readJson, readJsonl, writeJson } from "../../src/utils/fs";
99
import { getOrchestrationSummaryPath, getOrchestrationTracePath, getRecoveryCheckpointPath, getWorkspaceConfigPath } from "../../src/utils/paths";
1010

@@ -17,6 +17,7 @@ async function makeWorkspace(): Promise<string> {
1717
await fs.writeFile(path.join(dir, "src", "config.json"), '{"learning_rate":0.1}\n', "utf8");
1818
await fs.writeFile(path.join(dir, "src", "strategy.txt"), "baseline\n", "utf8");
1919
await fs.writeFile(path.join(dir, "src", "module.ts"), "export const variant = 0;\n", "utf8");
20+
await fs.writeFile(path.join(dir, "evaluate.py"), "print(0.93)\n", "utf8");
2021
await writeJson(getWorkspaceConfigPath(dir), { ...loopSpec, workspace_root: dir });
2122
await writeJson(getRecoveryCheckpointPath(dir), {
2223
run_id: "recoverable-run",
@@ -30,11 +31,12 @@ afterEach(async () => {
3031
await Promise.all(tempDirs.splice(0).map((dir) => fs.rm(dir, { recursive: true, force: true })));
3132
});
3233

33-
describe("governed experiment workflow", () => {
34-
it("writes an orchestration trace in specialist order", async () => {
34+
describe("governed experiment workflow bridge", () => {
35+
it("writes an orchestration trace through the python controller authority path", async () => {
3536
const workspace = await makeWorkspace();
3637
const spec: ExperimentSpec = { ...(loopSpec as ExperimentSpec), workspace_root: workspace };
37-
const result = await runGovernedExperimentWorkflow({ workspaceRoot: workspace, spec });
38+
await experiment_init.execute({ workspace_root: workspace, spec });
39+
const result = JSON.parse(await experiment_run_governed_workflow.execute({ workspace_root: workspace }));
3840
const steps = await readJsonl<{ actor: string; status: string; payload?: { execution_mode?: string; raw_excerpt?: string | null } }>(getOrchestrationTracePath(workspace));
3941
const summary = await readJson<{ specialist_audit?: Array<{ actor: string; session_id: string | null; execution_mode: string | null; fallback_reason: string | null; raw_excerpt: string | null }> }>(getOrchestrationSummaryPath(workspace), {});
4042
expect(result.total_iterations).toBeGreaterThan(0);
@@ -47,11 +49,8 @@ describe("governed experiment workflow", () => {
4749
"status_poll.py",
4850
"judge_result.py",
4951
]);
50-
expect(steps.every((step) => typeof step.payload?.execution_mode === "string" || (step.actor === "Sisyphus (Ultraworker)" && step.status === "blocked"))).toBe(true);
51-
expect(steps.every((step) => step.payload?.execution_mode === "fallback")).toBe(true);
52-
expect(steps.some((step) => typeof step.payload?.raw_excerpt === "string" || step.payload?.raw_excerpt === null)).toBe(true);
53-
expect(summary.specialist_audit?.length).toBeGreaterThan(0);
54-
expect(summary.specialist_audit?.every((entry) => typeof entry.execution_mode === "string" || entry.execution_mode === null)).toBe(true);
55-
expect(summary.specialist_audit?.some((entry) => entry.raw_excerpt !== null || entry.fallback_reason !== null || entry.session_id !== null)).toBe(true);
56-
});
52+
expect(result.authority_path).toBe("python_controller");
53+
expect(result.legacy_ts_workflow).toBe(false);
54+
expect(summary.specialist_audit).toBeUndefined();
55+
}, 30000);
5756
});

0 commit comments

Comments
 (0)