Skip to content

Commit 7ada8e3

Browse files
that-github-userunknownclaude
authored
Add schema validation for .thinktank result files (#126)
parseAndValidateResult() checks required fields before use. All commands that load result files now validate and skip/warn on invalid data instead of crashing. 24 new tests for valid/invalid schemas. Generated by thinktank Opus (5 agents, 4 pass, Copeland: #1 at +3). Closes #69 Co-authored-by: unknown <that-github-user@github.com> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 07bc125 commit 7ada8e3

File tree

6 files changed

+248
-12
lines changed

6 files changed

+248
-12
lines changed

src/commands/apply.ts

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import { promisify } from "node:util";
55
import pc from "picocolors";
66
import type { EnsembleResult } from "../types.js";
77
import { cleanupBranches, getRepoRoot, removeWorktree } from "../utils/git.js";
8+
import { parseAndValidateResult } from "../utils/schema.js";
89

910
const exec = promisify(execFile);
1011

@@ -71,9 +72,15 @@ export async function apply(opts: ApplyOptions): Promise<void> {
7172
let result: EnsembleResult;
7273
try {
7374
const raw = await readFile(join(".thinktank", "latest.json"), "utf-8");
74-
result = JSON.parse(raw);
75-
} catch {
76-
console.error(" No results found. Run `thinktank run` first.");
75+
result = parseAndValidateResult(raw, "latest.json");
76+
} catch (err) {
77+
const msg = (err as Error).message;
78+
if (msg.includes("Invalid result file")) {
79+
console.error(` ${msg}`);
80+
console.error(" The result file may be corrupted. Try running `thinktank run` again.");
81+
} else {
82+
console.error(" No results found. Run `thinktank run` first.");
83+
}
7784
process.exit(1);
7885
}
7986

src/commands/evaluate.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { join } from "node:path";
33
import pc from "picocolors";
44
import { analyzeConvergence, copelandRecommend, recommend } from "../scoring/convergence.js";
55
import type { EnsembleResult } from "../types.js";
6+
import { parseAndValidateResult } from "../utils/schema.js";
67

78
interface RunEvaluation {
89
file: string;
@@ -83,9 +84,9 @@ export async function evaluate(): Promise<void> {
8384
for (const file of files) {
8485
try {
8586
const raw = await readFile(join(".thinktank", file), "utf-8");
86-
runs.push(JSON.parse(raw) as EnsembleResult);
87-
} catch {
88-
// skip malformed
87+
runs.push(parseAndValidateResult(raw, file));
88+
} catch (err) {
89+
console.warn(` Skipping ${file}: ${(err as Error).message}`);
8990
}
9091
}
9192

src/commands/list.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { join } from "node:path";
33
import pc from "picocolors";
44
import type { EnsembleResult } from "../types.js";
55
import { displayResults, padRight } from "../utils/display.js";
6+
import { parseAndValidateResult } from "../utils/schema.js";
67

78
export interface RunSummary {
89
runNumber: number;
@@ -48,9 +49,9 @@ export async function loadAllRuns(): Promise<{ filename: string; result: Ensembl
4849
for (const file of files) {
4950
try {
5051
const raw = await readFile(join(".thinktank", file), "utf-8");
51-
runs.push({ filename: file, result: JSON.parse(raw) as EnsembleResult });
52-
} catch {
53-
// skip malformed files
52+
runs.push({ filename: file, result: parseAndValidateResult(raw, file) });
53+
} catch (err) {
54+
console.warn(` Skipping ${file}: ${(err as Error).message}`);
5455
}
5556
}
5657
return runs;

src/commands/stats.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import { readdir, readFile } from "node:fs/promises";
22
import { join } from "node:path";
33
import pc from "picocolors";
44
import type { EnsembleResult } from "../types.js";
5+
import { parseAndValidateResult } from "../utils/schema.js";
56

67
export interface StatsOptions {
78
model?: string;
@@ -62,9 +63,9 @@ export async function stats(opts: StatsOptions = {}): Promise<void> {
6263
for (const file of files) {
6364
try {
6465
const raw = await readFile(join(".thinktank", file), "utf-8");
65-
allResults.push(JSON.parse(raw) as EnsembleResult);
66-
} catch {
67-
// skip malformed files
66+
allResults.push(parseAndValidateResult(raw, file));
67+
} catch (err) {
68+
console.warn(` Skipping ${file}: ${(err as Error).message}`);
6869
}
6970
}
7071

src/utils/schema.test.ts

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
import assert from "node:assert/strict";
2+
import { describe, it } from "node:test";
3+
import { parseAndValidateResult, validateResult } from "./schema.js";
4+
5+
function makeValidResult(): Record<string, unknown> {
6+
return {
7+
prompt: "fix the bug",
8+
model: "sonnet",
9+
timestamp: "2025-01-01T00:00:00Z",
10+
scoring: "weighted",
11+
agents: [
12+
{
13+
id: 1,
14+
worktree: "/tmp/wt-1",
15+
status: "success",
16+
exitCode: 0,
17+
duration: 30,
18+
output: "done",
19+
diff: "--- a/file\n+++ b/file",
20+
filesChanged: ["file.ts"],
21+
linesAdded: 1,
22+
linesRemoved: 0,
23+
},
24+
],
25+
tests: [{ agentId: 1, passed: true, output: "ok", exitCode: 0 }],
26+
convergence: [{ agents: [1], similarity: 1, filesChanged: ["file.ts"], description: "group" }],
27+
recommended: 1,
28+
scores: [{ agentId: 1, testPoints: 10, convergencePoints: 5, diffSizePoints: 3, total: 18 }],
29+
};
30+
}
31+
32+
describe("validateResult", () => {
33+
it("returns null for a valid result", () => {
34+
assert.equal(validateResult(makeValidResult()), null);
35+
});
36+
37+
it("returns null when recommended is null", () => {
38+
const result = makeValidResult();
39+
result.recommended = null;
40+
assert.equal(validateResult(result), null);
41+
});
42+
43+
it("returns null with optional copelandScores", () => {
44+
const result = makeValidResult();
45+
result.copelandScores = [];
46+
assert.equal(validateResult(result), null);
47+
});
48+
49+
it("rejects null", () => {
50+
assert.match(validateResult(null)!, /non-null object/);
51+
});
52+
53+
it("rejects non-object", () => {
54+
assert.match(validateResult("string")!, /non-null object/);
55+
});
56+
57+
it("rejects missing prompt", () => {
58+
const result = makeValidResult();
59+
delete result.prompt;
60+
assert.match(validateResult(result)!, /prompt/);
61+
});
62+
63+
it("rejects non-string prompt", () => {
64+
const result = makeValidResult();
65+
result.prompt = 42;
66+
assert.match(validateResult(result)!, /prompt/);
67+
});
68+
69+
it("rejects missing model", () => {
70+
const result = makeValidResult();
71+
delete result.model;
72+
assert.match(validateResult(result)!, /model/);
73+
});
74+
75+
it("rejects missing timestamp", () => {
76+
const result = makeValidResult();
77+
delete result.timestamp;
78+
assert.match(validateResult(result)!, /timestamp/);
79+
});
80+
81+
it("rejects invalid scoring value", () => {
82+
const result = makeValidResult();
83+
result.scoring = "invalid";
84+
assert.match(validateResult(result)!, /scoring/);
85+
});
86+
87+
it("rejects missing scoring", () => {
88+
const result = makeValidResult();
89+
delete result.scoring;
90+
assert.match(validateResult(result)!, /scoring/);
91+
});
92+
93+
it("rejects non-array agents", () => {
94+
const result = makeValidResult();
95+
result.agents = "not-array";
96+
assert.match(validateResult(result)!, /agents/);
97+
});
98+
99+
it("rejects missing agents", () => {
100+
const result = makeValidResult();
101+
delete result.agents;
102+
assert.match(validateResult(result)!, /agents/);
103+
});
104+
105+
it("rejects non-array tests", () => {
106+
const result = makeValidResult();
107+
result.tests = {};
108+
assert.match(validateResult(result)!, /tests/);
109+
});
110+
111+
it("rejects non-array convergence", () => {
112+
const result = makeValidResult();
113+
result.convergence = "nope";
114+
assert.match(validateResult(result)!, /convergence/);
115+
});
116+
117+
it("rejects non-number non-null recommended", () => {
118+
const result = makeValidResult();
119+
result.recommended = "bad";
120+
assert.match(validateResult(result)!, /recommended/);
121+
});
122+
123+
it("rejects missing recommended", () => {
124+
const result = makeValidResult();
125+
delete result.recommended;
126+
assert.match(validateResult(result)!, /recommended/);
127+
});
128+
129+
it("rejects missing scores", () => {
130+
const result = makeValidResult();
131+
delete result.scores;
132+
assert.match(validateResult(result)!, /scores/);
133+
});
134+
135+
it("rejects non-array scores", () => {
136+
const result = makeValidResult();
137+
result.scores = "bad";
138+
assert.match(validateResult(result)!, /scores/);
139+
});
140+
141+
it("accepts empty arrays for agents, tests, convergence, scores", () => {
142+
const result = makeValidResult();
143+
result.agents = [];
144+
result.tests = [];
145+
result.convergence = [];
146+
result.scores = [];
147+
result.recommended = null;
148+
assert.equal(validateResult(result), null);
149+
});
150+
});
151+
152+
describe("parseAndValidateResult", () => {
153+
it("parses and returns a valid result", () => {
154+
const json = JSON.stringify(makeValidResult());
155+
const result = parseAndValidateResult(json, "test.json");
156+
assert.equal(result.prompt, "fix the bug");
157+
assert.equal(result.model, "sonnet");
158+
});
159+
160+
it("throws on invalid JSON", () => {
161+
assert.throws(() => parseAndValidateResult("{bad", "test.json"), /JSON/i);
162+
});
163+
164+
it("throws on valid JSON but invalid schema", () => {
165+
assert.throws(() => parseAndValidateResult('{"foo": 1}', "test.json"), /Invalid result file/);
166+
});
167+
168+
it("includes filename in error message", () => {
169+
assert.throws(() => parseAndValidateResult("{}", "run-2025.json"), /run-2025\.json/);
170+
});
171+
});

src/utils/schema.ts

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import type { EnsembleResult } from "../types.js";
2+
3+
/**
4+
* Validate that a parsed JSON object has the required shape of an EnsembleResult.
5+
* Returns null on success, or a descriptive error string on failure.
6+
*/
7+
export function validateResult(data: unknown): string | null {
8+
if (data === null || typeof data !== "object") {
9+
return "result must be a non-null object";
10+
}
11+
12+
const obj = data as Record<string, unknown>;
13+
14+
if (typeof obj.prompt !== "string") {
15+
return "missing or invalid field: prompt (expected string)";
16+
}
17+
if (typeof obj.model !== "string") {
18+
return "missing or invalid field: model (expected string)";
19+
}
20+
if (typeof obj.timestamp !== "string") {
21+
return "missing or invalid field: timestamp (expected string)";
22+
}
23+
if (obj.scoring !== "weighted" && obj.scoring !== "copeland") {
24+
return 'missing or invalid field: scoring (expected "weighted" or "copeland")';
25+
}
26+
if (!Array.isArray(obj.agents)) {
27+
return "missing or invalid field: agents (expected array)";
28+
}
29+
if (!Array.isArray(obj.tests)) {
30+
return "missing or invalid field: tests (expected array)";
31+
}
32+
if (!Array.isArray(obj.convergence)) {
33+
return "missing or invalid field: convergence (expected array)";
34+
}
35+
if (obj.recommended !== null && typeof obj.recommended !== "number") {
36+
return "missing or invalid field: recommended (expected number or null)";
37+
}
38+
if (!Array.isArray(obj.scores)) {
39+
return "missing or invalid field: scores (expected array)";
40+
}
41+
42+
return null;
43+
}
44+
45+
/**
46+
* Parse JSON and validate as EnsembleResult. Returns the result or throws with a descriptive message.
47+
*/
48+
export function parseAndValidateResult(json: string, filename: string): EnsembleResult {
49+
const data = JSON.parse(json);
50+
const error = validateResult(data);
51+
if (error) {
52+
throw new Error(`Invalid result file ${filename}: ${error}`);
53+
}
54+
return data as EnsembleResult;
55+
}

0 commit comments

Comments
 (0)