Skip to content

Commit b17f1e7

Browse files
committed
wip
1 parent 19a6c90 commit b17f1e7

File tree

4 files changed

+131
-64
lines changed

4 files changed

+131
-64
lines changed

CLAUDE.md

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -57,15 +57,18 @@ Generate condensed versions of the documentation to reduce context size:
5757
Verify the accuracy of distilled summaries against original documentation:
5858

5959
- `pnpm verify-distilled` - Verify all distilled summaries for accuracy
60-
- `pnpm verify-distilled:dry-run` - Preview what would be verified without making API calls
61-
- `pnpm verify-distilled:debug` - Verify only 2 sections for debugging
62-
63-
The verification script:
64-
1. Loads `distilled.json` containing summaries and original content
65-
2. Uses the Anthropic Batch API to send each summary and original content to Claude
66-
3. Claude evaluates whether the summary is accurate or contains errors/omissions
67-
4. Generates `distilled-verification.json` with results (ACCURATE/NOT_ACCURATE) and reasoning
68-
5. Outputs statistics about accuracy rates
60+
- `pnpm show-verification-errors` - Display all sections that failed verification
61+
62+
The verification workflow:
63+
1. Run `pnpm verify-distilled` to verify all distilled summaries
64+
- Loads `distilled.json` containing summaries and original content
65+
- Uses the Anthropic Batch API to send each summary and original content to Claude
66+
- Claude evaluates whether the summary is accurate or contains errors/omissions
67+
- Generates `distilled-verification.json` with results (ACCURATE/NOT_ACCURATE) and reasoning
68+
- Outputs statistics about accuracy rates
69+
2. Run `pnpm show-verification-errors` to see detailed list of all sections that are NOT_ACCURATE
70+
- Displays each problematic section with its reasoning
71+
- Shows summary statistics
6972

7073
**Note:** All documentation generation and verification commands require `ANTHROPIC_API_KEY` to be set in `packages/mcp-server/.env`
7174

packages/mcp-server/package.json

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,7 @@
1717
"generate-distilled:dry-run": "node --import node-resolve-ts/register scripts/generate-summaries.ts --prompt-type distilled --dry-run",
1818
"generate-distilled:debug": "DEBUG_MODE=1 node --import node-resolve-ts/register scripts/generate-summaries.ts --prompt-type distilled",
1919
"verify-distilled": "node --import node-resolve-ts/register scripts/verify-distilled.ts",
20-
"verify-distilled:dry-run": "node --import node-resolve-ts/register scripts/verify-distilled.ts --dry-run",
21-
"verify-distilled:debug": "DEBUG_MODE=1 node --import node-resolve-ts/register scripts/verify-distilled.ts"
20+
"show-verification-errors": "node --import node-resolve-ts/register scripts/show-verification-errors.ts"
2221
},
2322
"exports": {
2423
".": "./src/index.ts",
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
#!/usr/bin/env node
2+
import { readFile } from 'node:fs/promises';
3+
import path from 'node:path';
4+
import { fileURLToPath } from 'node:url';
5+
import * as v from 'valibot';
6+
7+
const current_filename = fileURLToPath(import.meta.url);
8+
const current_dirname = path.dirname(current_filename);
9+
10+
interface VerificationResult {
11+
slug: string;
12+
status: 'ACCURATE' | 'NOT_ACCURATE';
13+
reasoning: string;
14+
}
15+
16+
interface VerificationOutput {
17+
generated_at: string;
18+
model: string;
19+
total_sections: number;
20+
verified_sections: number;
21+
accurate_count: number;
22+
not_accurate_count: number;
23+
results: VerificationResult[];
24+
}
25+
26+
const verification_output_schema = v.object({
27+
generated_at: v.string(),
28+
model: v.string(),
29+
total_sections: v.number(),
30+
verified_sections: v.number(),
31+
accurate_count: v.number(),
32+
not_accurate_count: v.number(),
33+
results: v.array(
34+
v.object({
35+
slug: v.string(),
36+
status: v.union([v.literal('ACCURATE'), v.literal('NOT_ACCURATE')]),
37+
reasoning: v.string(),
38+
}),
39+
),
40+
});
41+
42+
async function main() {
43+
const verification_path = path.join(current_dirname, '../src/distilled-verification.json');
44+
45+
console.log('📂 Reading verification results...\n');
46+
47+
let content: string;
48+
try {
49+
content = await readFile(verification_path, 'utf-8');
50+
} catch (error) {
51+
console.error('❌ Error: Could not find distilled-verification.json');
52+
console.error('Please run `pnpm verify-distilled` first to generate the file.');
53+
process.exit(1);
54+
}
55+
56+
const data = JSON.parse(content);
57+
const validated = v.safeParse(verification_output_schema, data);
58+
59+
if (!validated.success) {
60+
console.error('❌ Error: Invalid verification file format');
61+
console.error(JSON.stringify(validated.issues, null, 2));
62+
process.exit(1);
63+
}
64+
65+
const verification_data = validated.output;
66+
67+
// Filter for NOT_ACCURATE results
68+
const not_accurate = verification_data.results.filter((r) => r.status === 'NOT_ACCURATE');
69+
70+
// Print header
71+
console.log('📊 Verification Results Summary');
72+
console.log('═'.repeat(80));
73+
console.log(`Generated: ${new Date(verification_data.generated_at).toLocaleString()}`);
74+
console.log(`Model: ${verification_data.model}`);
75+
console.log(`Total Sections: ${verification_data.total_sections}`);
76+
console.log(`Verified: ${verification_data.verified_sections}`);
77+
console.log(
78+
`✅ Accurate: ${verification_data.accurate_count} (${((verification_data.accurate_count / verification_data.verified_sections) * 100).toFixed(1)}%)`,
79+
);
80+
console.log(
81+
`❌ Not Accurate: ${verification_data.not_accurate_count} (${((verification_data.not_accurate_count / verification_data.verified_sections) * 100).toFixed(1)}%)`,
82+
);
83+
console.log('═'.repeat(80));
84+
85+
if (not_accurate.length === 0) {
86+
console.log('\n🎉 All sections are accurate! No issues found.');
87+
return;
88+
}
89+
90+
// Print all NOT_ACCURATE entries
91+
console.log(`\n❌ NOT ACCURATE SECTIONS (${not_accurate.length}):\n`);
92+
93+
for (let i = 0; i < not_accurate.length; i++) {
94+
const result = not_accurate[i]!;
95+
console.log(`${i + 1}. ${result.slug}`);
96+
console.log(` Reasoning: ${result.reasoning}`);
97+
console.log('');
98+
}
99+
100+
console.log('═'.repeat(80));
101+
console.log(
102+
`\nFound ${not_accurate.length} section(s) that need review or regeneration.`,
103+
);
104+
}
105+
106+
main().catch((error) => {
107+
console.error('❌ Fatal error:', error);
108+
process.exit(1);
109+
});

packages/mcp-server/scripts/verify-distilled.ts

Lines changed: 9 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,11 @@ import 'dotenv/config';
33
import { writeFile, mkdir } from 'node:fs/promises';
44
import path from 'node:path';
55
import { fileURLToPath } from 'node:url';
6-
import { Command } from 'commander';
76
import { AnthropicProvider } from '../src/lib/anthropic.ts';
87
import type { AnthropicBatchRequest } from '../src/lib/schemas.ts';
98
import distilled_data from '../src/distilled.json' with { type: 'json' };
109
import * as v from 'valibot';
1110

12-
interface CliOptions {
13-
dryRun: boolean;
14-
debug: boolean;
15-
}
16-
1711
interface VerificationResult {
1812
slug: string;
1913
status: 'ACCURATE' | 'NOT_ACCURATE';
@@ -49,17 +43,6 @@ const verification_output_schema = v.object({
4943
),
5044
});
5145

52-
const program = new Command();
53-
54-
program
55-
.name('verify-distilled')
56-
.description(
57-
'Verify the accuracy of distilled summaries by comparing them to original documentation',
58-
)
59-
.version('1.0.0')
60-
.option('-d, --dry-run', 'Show what would be verified without making API calls', false)
61-
.option('--debug', 'Debug mode: process only 2 sections', false);
62-
6346
const VERIFICATION_PROMPT = `You are tasked with verifying the accuracy of a distilled/condensed version of documentation against the original content.
6447
6548
Your task:
@@ -115,44 +98,16 @@ function parse_verification_response(text: string): {
11598
}
11699

117100
async function main() {
118-
program.parse();
119-
const options = program.opts<CliOptions>();
120-
121-
const debug = options.debug || process.env.DEBUG_MODE === '1';
122-
123101
console.log('🔍 Starting distilled verification...\n');
124102

125-
if (options.dryRun) {
126-
console.log('🔍 DRY RUN MODE - No API calls will be made\n');
127-
}
128-
if (debug) {
129-
console.log('🐛 DEBUG MODE - Will process only 2 sections\n');
130-
}
131-
132103
const output_path = path.join(current_dirname, '../src/distilled-verification.json');
133104

134105
// Load distilled data
135106
console.log('📂 Loading distilled.json...');
136107
const { summaries, content } = distilled_data;
137108

138-
const sections_to_verify = Object.keys(summaries);
139-
console.log(`Found ${sections_to_verify.length} sections to verify`);
140-
141-
// Debug mode: limit to 2 sections
142-
let sections = sections_to_verify;
143-
if (debug) {
144-
console.log('\n🐛 Processing only 2 sections for debugging');
145-
sections = sections_to_verify.slice(0, 2);
146-
}
147-
148-
console.log(`\n📋 Will verify ${sections.length} sections`);
149-
150-
// Dry run mode: exit before API calls
151-
if (options.dryRun) {
152-
console.log('\n🔍 DRY RUN complete - no changes were made');
153-
console.log(`Would have verified ${sections.length} sections`);
154-
return;
155-
}
109+
const sections = Object.keys(summaries);
110+
console.log(`Found ${sections.length} sections to verify\n`);
156111

157112
// Check for API key
158113
const api_key = process.env.ANTHROPIC_API_KEY;
@@ -164,7 +119,7 @@ async function main() {
164119
}
165120

166121
// Initialize Anthropic API
167-
console.log('\n🤖 Initializing Anthropic API...');
122+
console.log('🤖 Initializing Anthropic API...');
168123
const anthropic = new AnthropicProvider('claude-sonnet-4-5-20250929', api_key);
169124

170125
// Prepare batch requests
@@ -177,7 +132,7 @@ async function main() {
177132
custom_id: `verify-${index}`,
178133
params: {
179134
model: anthropic.get_model_identifier(),
180-
max_tokens: 4096, // Increased to allow full responses
135+
max_tokens: 4096,
181136
messages: [
182137
{
183138
role: 'user',
@@ -290,7 +245,7 @@ async function main() {
290245
const output_data: VerificationOutput = {
291246
generated_at: new Date().toISOString(),
292247
model: 'claude-sonnet-4-5-20250929',
293-
total_sections: sections_to_verify.length,
248+
total_sections: sections.length,
294249
verified_sections: sections.length,
295250
accurate_count,
296251
not_accurate_count,
@@ -307,7 +262,7 @@ async function main() {
307262

308263
// Print summary
309264
console.log('\n📊 Verification Summary:');
310-
console.log(` Total sections: ${sections_to_verify.length}`);
265+
console.log(` Total sections: ${sections.length}`);
311266
console.log(` Verified sections: ${sections.length}`);
312267
console.log(
313268
` ✅ Accurate: ${accurate_count} (${((accurate_count / sections.length) * 100).toFixed(1)}%)`,
@@ -317,16 +272,17 @@ async function main() {
317272
);
318273

319274
if (not_accurate_count > 0) {
320-
console.log('\n⚠️ Sections with issues:');
275+
console.log('\n⚠️ Sections with issues (first 10):');
321276
verification_results
322277
.filter((r) => r.status === 'NOT_ACCURATE')
323-
.slice(0, 10) // Show first 10
278+
.slice(0, 10)
324279
.forEach((r) => {
325280
console.log(` - ${r.slug}: ${r.reasoning}`);
326281
});
327282
if (not_accurate_count > 10) {
328283
console.log(` ... and ${not_accurate_count - 10} more`);
329284
}
285+
console.log('\n💡 Run `pnpm show-verification-errors` to see all issues');
330286
}
331287

332288
console.log(`\n✅ Results written to: ${output_path}`);

0 commit comments

Comments
 (0)