wip

khromov · khromov · commit f27a7778c782 · 2025-10-12T23:49:16.000+02:00
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -34,6 +34,41 @@ pnpm dev
 - `pnpm db:migrate` - Run migrations
 - `pnpm db:studio` - Open Drizzle Studio
 
+### Documentation Generation Commands
+
+#### Generate Use Case Summaries
+
+Generate short descriptions of when each documentation section would be useful:
+
+- `pnpm generate-summaries` - Generate use case summaries for all sections
+- `pnpm generate-summaries:dry-run` - Preview what would be generated without making API calls
+- `pnpm generate-summaries:debug` - Process only 2 sections for debugging
+
+#### Generate Distilled Documentation
+
+Generate condensed versions of the documentation to reduce context size:
+
+- `pnpm generate-distilled` - Generate distilled versions for all sections
+- `pnpm generate-distilled:dry-run` - Preview what would be generated without making API calls
+- `pnpm generate-distilled:debug` - Process only 2 sections for debugging
+
+#### Verify Distilled Documentation
+
+Verify the accuracy of distilled summaries against original documentation:
+
+- `pnpm verify-distilled` - Verify all distilled summaries for accuracy
+- `pnpm verify-distilled:dry-run` - Preview what would be verified without making API calls
+- `pnpm verify-distilled:debug` - Verify only 2 sections for debugging
+
+The verification script:
+1. Loads `distilled.json` containing summaries and original content
+2. Uses the Anthropic Batch API to send each summary and original content to Claude
+3. Claude evaluates whether the summary is accurate or contains errors/omissions
+4. Generates `distilled-verification.json` with results (ACCURATE/NOT_ACCURATE) and reasoning
+5. Outputs statistics about accuracy rates
+
+**Note:** All documentation generation and verification commands require `ANTHROPIC_API_KEY` to be set in `packages/mcp-server/.env`
+
 ## Architecture
 
 ### MCP Server Implementation
@@ -85,6 +120,7 @@ Required environment variables:
 
 - `DATABASE_URL`: SQLite database path (default: `file:test.db`)
 - `VOYAGE_API_KEY`: API key for embeddings support (optional)
+- `ANTHROPIC_API_KEY`: API key for documentation generation and verification (required for doc scripts)
 
 When connected to the svelte-llm MCP server, you have access to comprehensive Svelte 5 and SvelteKit documentation. Here's how to use the available tools effectively:
 
diff --git a/package.json b/package.json
@@ -22,6 +22,9 @@
 		"generate-distilled": "pnpm --filter @sveltejs/mcp-server run generate-distilled",
 		"generate-distilled:dry-run": "pnpm --filter @sveltejs/mcp-server run generate-distilled:dry-run",
 		"generate-distilled:debug": "pnpm --filter @sveltejs/mcp-server run generate-distilled:debug",
+		"verify-distilled": "pnpm --filter @sveltejs/mcp-server run verify-distilled",
+		"verify-distilled:dry-run": "pnpm --filter @sveltejs/mcp-server run verify-distilled:dry-run",
+		"verify-distilled:debug": "pnpm --filter @sveltejs/mcp-server run verify-distilled:debug",
 		"generate-prompt-docs": "node --import node-resolve-ts/register scripts/update-docs-prompts.ts",
 		"release": "pnpm --filter @sveltejs/mcp run build && changeset publish",
 		"changeset:version": "changeset version && pnpm --filter @sveltejs/mcp run update:version && git add --all"
diff --git a/packages/mcp-server/package.json b/packages/mcp-server/package.json
@@ -15,7 +15,10 @@
 		"generate-summaries:debug": "DEBUG_MODE=1 node --import node-resolve-ts/register scripts/generate-summaries.ts",
 		"generate-distilled": "node --import node-resolve-ts/register scripts/generate-summaries.ts --prompt-type distilled",
 		"generate-distilled:dry-run": "node --import node-resolve-ts/register scripts/generate-summaries.ts --prompt-type distilled --dry-run",
-		"generate-distilled:debug": "DEBUG_MODE=1 node --import node-resolve-ts/register scripts/generate-summaries.ts --prompt-type distilled"
+		"generate-distilled:debug": "DEBUG_MODE=1 node --import node-resolve-ts/register scripts/generate-summaries.ts --prompt-type distilled",
+		"verify-distilled": "node --import node-resolve-ts/register scripts/verify-distilled.ts",
+		"verify-distilled:dry-run": "node --import node-resolve-ts/register scripts/verify-distilled.ts --dry-run",
+		"verify-distilled:debug": "DEBUG_MODE=1 node --import node-resolve-ts/register scripts/verify-distilled.ts"
 	},
 	"exports": {
 		".": "./src/index.ts",
diff --git a/packages/mcp-server/scripts/verify-distilled.test.ts b/packages/mcp-server/scripts/verify-distilled.test.ts
@@ -0,0 +1,295 @@
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import { writeFile, mkdir, rm } from 'node:fs/promises';
+import path from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+const current_filename = fileURLToPath(import.meta.url);
+const current_dirname = path.dirname(current_filename);
+const test_output_dir = path.join(current_dirname, '../test-output');
+const test_verification_path = path.join(test_output_dir, 'distilled-verification.json');
+
+interface VerificationResult {
+	slug: string;
+	status: 'ACCURATE' | 'NOT_ACCURATE';
+	reasoning: string;
+}
+
+interface VerificationOutput {
+	generated_at: string;
+	model: string;
+	total_sections: number;
+	verified_sections: number;
+	accurate_count: number;
+	not_accurate_count: number;
+	results: VerificationResult[];
+}
+
+function create_verification_output(
+	results: VerificationResult[],
+	total_sections: number = results.length,
+): VerificationOutput {
+	const accurate_count = results.filter((r) => r.status === 'ACCURATE').length;
+	const not_accurate_count = results.filter((r) => r.status === 'NOT_ACCURATE').length;
+
+	return {
+		generated_at: new Date().toISOString(),
+		model: 'claude-sonnet-4-5-20250929',
+		total_sections,
+		verified_sections: results.length,
+		accurate_count,
+		not_accurate_count,
+		results,
+	};
+}
+
+describe('verify-distilled', () => {
+	beforeEach(async () => {
+		await mkdir(test_output_dir, { recursive: true });
+	});
+
+	afterEach(async () => {
+		try {
+			await rm(test_output_dir, { recursive: true, force: true });
+		} catch {
+			// Ignore cleanup errors
+		}
+	});
+
+	describe('verification result structure', () => {
+		it('should create valid verification output structure', () => {
+			const results: VerificationResult[] = [
+				{
+					slug: 'svelte/overview',
+					status: 'ACCURATE',
+					reasoning: 'Summary accurately reflects original content',
+				},
+			];
+
+			const output = create_verification_output(results);
+
+			expect(output).toHaveProperty('generated_at');
+			expect(output).toHaveProperty('model');
+			expect(output).toHaveProperty('total_sections');
+			expect(output).toHaveProperty('verified_sections');
+			expect(output).toHaveProperty('accurate_count');
+			expect(output).toHaveProperty('not_accurate_count');
+			expect(output).toHaveProperty('results');
+			expect(output.results).toHaveLength(1);
+		});
+
+		it('should correctly count accurate vs not accurate results', () => {
+			const results: VerificationResult[] = [
+				{
+					slug: 'svelte/overview',
+					status: 'ACCURATE',
+					reasoning: 'Good summary',
+				},
+				{
+					slug: 'svelte/$state',
+					status: 'NOT_ACCURATE',
+					reasoning: 'Missing critical information',
+				},
+				{
+					slug: 'svelte/$effect',
+					status: 'ACCURATE',
+					reasoning: 'Well condensed',
+				},
+			];
+
+			const output = create_verification_output(results);
+
+			expect(output.accurate_count).toBe(2);
+			expect(output.not_accurate_count).toBe(1);
+			expect(output.verified_sections).toBe(3);
+		});
+	});
+
+	describe('file operations', () => {
+		it('should write verification results to JSON file', async () => {
+			const results: VerificationResult[] = [
+				{
+					slug: 'svelte/overview',
+					status: 'ACCURATE',
+					reasoning: 'Summary is accurate',
+				},
+			];
+
+			const output = create_verification_output(results);
+
+			await writeFile(test_verification_path, JSON.stringify(output, null, 2), 'utf-8');
+
+			// Verify file was written
+			const fs = await import('node:fs/promises');
+			const content = await fs.readFile(test_verification_path, 'utf-8');
+			const parsed = JSON.parse(content);
+
+			expect(parsed.results).toHaveLength(1);
+			expect(parsed.results[0]?.slug).toBe('svelte/overview');
+			expect(parsed.results[0]?.status).toBe('ACCURATE');
+		});
+	});
+
+	describe('verification status', () => {
+		it('should mark summaries as ACCURATE when appropriate', () => {
+			const result: VerificationResult = {
+				slug: 'svelte/overview',
+				status: 'ACCURATE',
+				reasoning: 'Core concepts preserved, minor simplifications acceptable',
+			};
+
+			expect(result.status).toBe('ACCURATE');
+			expect(result.reasoning).toBeTruthy();
+		});
+
+		it('should mark summaries as NOT_ACCURATE when appropriate', () => {
+			const result: VerificationResult = {
+				slug: 'svelte/$state',
+				status: 'NOT_ACCURATE',
+				reasoning: 'Factual error in code example',
+			};
+
+			expect(result.status).toBe('NOT_ACCURATE');
+			expect(result.reasoning).toBeTruthy();
+		});
+	});
+
+	describe('batch processing', () => {
+		it('should handle multiple verification results', () => {
+			const results: VerificationResult[] = [
+				{
+					slug: 'svelte/overview',
+					status: 'ACCURATE',
+					reasoning: 'Good',
+				},
+				{
+					slug: 'svelte/$state',
+					status: 'NOT_ACCURATE',
+					reasoning: 'Missing info',
+				},
+				{
+					slug: 'svelte/$effect',
+					status: 'ACCURATE',
+					reasoning: 'Well done',
+				},
+				{
+					slug: 'svelte/$derived',
+					status: 'ACCURATE',
+					reasoning: 'Correct',
+				},
+			];
+
+			const output = create_verification_output(results);
+
+			expect(output.results).toHaveLength(4);
+			expect(output.accurate_count).toBe(3);
+			expect(output.not_accurate_count).toBe(1);
+		});
+
+		it('should calculate percentages correctly', () => {
+			const results: VerificationResult[] = Array.from({ length: 10 }, (_, i) => ({
+				slug: `section-${i}`,
+				status: i < 7 ? 'ACCURATE' : 'NOT_ACCURATE',
+				reasoning: 'test',
+			}));
+
+			const output = create_verification_output(results);
+
+			expect(output.verified_sections).toBe(10);
+			expect(output.accurate_count).toBe(7);
+			expect(output.not_accurate_count).toBe(3);
+
+			// 70% accurate, 30% not accurate
+			const accurate_percentage = (output.accurate_count / output.verified_sections) * 100;
+			const not_accurate_percentage = (output.not_accurate_count / output.verified_sections) * 100;
+
+			expect(accurate_percentage).toBe(70);
+			expect(not_accurate_percentage).toBe(30);
+		});
+	});
+
+	describe('edge cases', () => {
+		it('should handle empty results array', () => {
+			const results: VerificationResult[] = [];
+			const output = create_verification_output(results);
+
+			expect(output.results).toHaveLength(0);
+			expect(output.accurate_count).toBe(0);
+			expect(output.not_accurate_count).toBe(0);
+		});
+
+		it('should handle all ACCURATE results', () => {
+			const results: VerificationResult[] = [
+				{
+					slug: 'section-1',
+					status: 'ACCURATE',
+					reasoning: 'Good',
+				},
+				{
+					slug: 'section-2',
+					status: 'ACCURATE',
+					reasoning: 'Great',
+				},
+			];
+
+			const output = create_verification_output(results);
+
+			expect(output.accurate_count).toBe(2);
+			expect(output.not_accurate_count).toBe(0);
+		});
+
+		it('should handle all NOT_ACCURATE results', () => {
+			const results: VerificationResult[] = [
+				{
+					slug: 'section-1',
+					status: 'NOT_ACCURATE',
+					reasoning: 'Error',
+				},
+				{
+					slug: 'section-2',
+					status: 'NOT_ACCURATE',
+					reasoning: 'Missing info',
+				},
+			];
+
+			const output = create_verification_output(results);
+
+			expect(output.accurate_count).toBe(0);
+			expect(output.not_accurate_count).toBe(2);
+		});
+	});
+
+	describe('reasoning validation', () => {
+		it('should include reasoning for each result', () => {
+			const results: VerificationResult[] = [
+				{
+					slug: 'svelte/overview',
+					status: 'ACCURATE',
+					reasoning: 'Core concepts preserved',
+				},
+				{
+					slug: 'svelte/$state',
+					status: 'NOT_ACCURATE',
+					reasoning: 'Code example contains error',
+				},
+			];
+
+			for (const result of results) {
+				expect(result.reasoning).toBeTruthy();
+				expect(result.reasoning.length).toBeGreaterThan(0);
+			}
+		});
+
+		it('should handle long reasoning text', () => {
+			const long_reasoning = 'A'.repeat(500); // 500 character reasoning
+
+			const result: VerificationResult = {
+				slug: 'svelte/overview',
+				status: 'ACCURATE',
+				reasoning: long_reasoning,
+			};
+
+			expect(result.reasoning).toBe(long_reasoning);
+			expect(result.reasoning.length).toBe(500);
+		});
+	});
+});
diff --git a/packages/mcp-server/scripts/verify-distilled.ts b/packages/mcp-server/scripts/verify-distilled.ts
diff --git a/packages/mcp-server/src/distilled-verification.json b/packages/mcp-server/src/distilled-verification.json