Skip to content

Commit 65ebeb4

Browse files
that-github-userunknownclaude
authored
Add compare command to diff two agents' results side by side (#49)
- `thinktank compare 1 3` shows detailed comparison - Displays: agent summaries, Jaccard similarity score with bar chart, file overlap (both/only-A/only-B), added line breakdown - Uses existing diff-parser infrastructure for analysis Closes #29 Co-authored-by: unknown <that-github-user@github.com> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 7454098 commit 65ebeb4

File tree

2 files changed

+114
-0
lines changed

2 files changed

+114
-0
lines changed

src/cli.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import { Command } from "commander";
44
import { apply } from "./commands/apply.js";
5+
import { compare } from "./commands/compare.js";
56
import { list } from "./commands/list.js";
67
import { run } from "./commands/run.js";
78

@@ -65,6 +66,16 @@ program
6566
});
6667
});
6768

69+
program
70+
.command("compare <agentA> <agentB>")
71+
.description("Compare two agents' results side by side")
72+
.action(async (agentA: string, agentB: string) => {
73+
await compare({
74+
agentA: parseInt(agentA, 10),
75+
agentB: parseInt(agentB, 10),
76+
});
77+
});
78+
6879
program
6980
.command("list")
7081
.description("List results from the most recent ensemble run")

src/commands/compare.ts

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
import { readFile } from "node:fs/promises";
2+
import { join } from "node:path";
3+
import pc from "picocolors";
4+
import { diffSimilarity, parseDiff } from "../scoring/diff-parser.js";
5+
import type { AgentResult, EnsembleResult } from "../types.js";
6+
7+
export interface CompareOptions {
8+
agentA: number;
9+
agentB: number;
10+
}
11+
12+
export async function compare(opts: CompareOptions): Promise<void> {
13+
let result: EnsembleResult;
14+
try {
15+
const raw = await readFile(join(".thinktank", "latest.json"), "utf-8");
16+
result = JSON.parse(raw);
17+
} catch {
18+
console.error(" No results found. Run `thinktank run` first.");
19+
process.exit(1);
20+
}
21+
22+
const agentA = result.agents.find((a) => a.id === opts.agentA);
23+
const agentB = result.agents.find((a) => a.id === opts.agentB);
24+
25+
if (!agentA) {
26+
console.error(` Agent #${opts.agentA} not found.`);
27+
console.error(` Available: ${result.agents.map((a) => `#${a.id}`).join(", ")}`);
28+
process.exit(1);
29+
}
30+
if (!agentB) {
31+
console.error(` Agent #${opts.agentB} not found.`);
32+
console.error(` Available: ${result.agents.map((a) => `#${a.id}`).join(", ")}`);
33+
process.exit(1);
34+
}
35+
36+
console.log();
37+
console.log(pc.bold(` Comparing Agent #${opts.agentA} vs Agent #${opts.agentB}`));
38+
console.log(pc.dim(" " + "─".repeat(58)));
39+
console.log();
40+
41+
// Summary table
42+
printAgentSummary(agentA, result);
43+
printAgentSummary(agentB, result);
44+
console.log();
45+
46+
// Similarity score
47+
const sim = diffSimilarity(agentA.diff, agentB.diff);
48+
const pct = Math.round(sim * 100);
49+
const bar = "█".repeat(Math.round(pct / 5)) + "░".repeat(20 - Math.round(pct / 5));
50+
console.log(` Similarity: ${bar} ${pct}%`);
51+
console.log();
52+
53+
// File comparison
54+
const filesA = new Set(agentA.filesChanged);
55+
const filesB = new Set(agentB.filesChanged);
56+
const allFiles = new Set([...filesA, ...filesB]);
57+
58+
console.log(pc.bold(" Files changed:"));
59+
for (const file of [...allFiles].sort()) {
60+
const inA = filesA.has(file);
61+
const inB = filesB.has(file);
62+
if (inA && inB) {
63+
console.log(` ${pc.green("both")} ${file}`);
64+
} else if (inA) {
65+
console.log(` ${pc.cyan(`#${opts.agentA} only`)} ${file}`);
66+
} else {
67+
console.log(` ${pc.yellow(`#${opts.agentB} only`)} ${file}`);
68+
}
69+
}
70+
console.log();
71+
72+
// Unique lines comparison
73+
const parsedA = parseDiff(agentA.diff);
74+
const parsedB = parseDiff(agentB.diff);
75+
const linesA = new Set(parsedA.flatMap((f) => f.addedLines.map((l) => `${f.path}:${l.trim()}`)));
76+
const linesB = new Set(parsedB.flatMap((f) => f.addedLines.map((l) => `${f.path}:${l.trim()}`)));
77+
78+
let shared = 0;
79+
let onlyA = 0;
80+
let onlyB = 0;
81+
for (const line of linesA) {
82+
if (linesB.has(line)) shared++;
83+
else onlyA++;
84+
}
85+
for (const line of linesB) {
86+
if (!linesA.has(line)) onlyB++;
87+
}
88+
89+
console.log(pc.bold(" Added lines:"));
90+
console.log(` Shared: ${shared}`);
91+
console.log(` Only #${opts.agentA}: ${onlyA}`);
92+
console.log(` Only #${opts.agentB}: ${onlyB}`);
93+
console.log();
94+
}
95+
96+
function printAgentSummary(agent: AgentResult, result: EnsembleResult): void {
97+
const test = result.tests.find((t) => t.agentId === agent.id);
98+
const testStr = test ? (test.passed ? pc.green("pass") : pc.red("fail")) : pc.dim("n/a");
99+
const rec = result.recommended === agent.id ? pc.cyan(" (recommended)") : "";
100+
console.log(
101+
` Agent #${agent.id}${rec}: ${agent.status} | tests: ${testStr} | +${agent.linesAdded}/-${agent.linesRemoved} | ${agent.filesChanged.length} files`,
102+
);
103+
}

0 commit comments

Comments
 (0)