Skip to content

Commit f27a777

Browse files
committed
wip
1 parent ff64389 commit f27a777

File tree

6 files changed

+1521
-1
lines changed

6 files changed

+1521
-1
lines changed

CLAUDE.md

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,41 @@ pnpm dev
3434
- `pnpm db:migrate` - Run migrations
3535
- `pnpm db:studio` - Open Drizzle Studio
3636

37+
### Documentation Generation Commands
38+
39+
#### Generate Use Case Summaries
40+
41+
Generate short descriptions of when each documentation section would be useful:
42+
43+
- `pnpm generate-summaries` - Generate use case summaries for all sections
44+
- `pnpm generate-summaries:dry-run` - Preview what would be generated without making API calls
45+
- `pnpm generate-summaries:debug` - Process only 2 sections for debugging
46+
47+
#### Generate Distilled Documentation
48+
49+
Generate condensed versions of the documentation to reduce context size:
50+
51+
- `pnpm generate-distilled` - Generate distilled versions for all sections
52+
- `pnpm generate-distilled:dry-run` - Preview what would be generated without making API calls
53+
- `pnpm generate-distilled:debug` - Process only 2 sections for debugging
54+
55+
#### Verify Distilled Documentation
56+
57+
Verify the accuracy of distilled summaries against original documentation:
58+
59+
- `pnpm verify-distilled` - Verify all distilled summaries for accuracy
60+
- `pnpm verify-distilled:dry-run` - Preview what would be verified without making API calls
61+
- `pnpm verify-distilled:debug` - Verify only 2 sections for debugging
62+
63+
The verification script:
64+
1. Loads `distilled.json` containing summaries and original content
65+
2. Uses the Anthropic Batch API to send each summary and original content to Claude
66+
3. Claude evaluates whether the summary is accurate or contains errors/omissions
67+
4. Generates `distilled-verification.json` with results (ACCURATE/NOT_ACCURATE) and reasoning
68+
5. Outputs statistics about accuracy rates
69+
70+
**Note:** All documentation generation and verification commands require `ANTHROPIC_API_KEY` to be set in `packages/mcp-server/.env`
71+
3772
## Architecture
3873

3974
### MCP Server Implementation
@@ -85,6 +120,7 @@ Required environment variables:
85120

86121
- `DATABASE_URL`: SQLite database path (default: `file:test.db`)
87122
- `VOYAGE_API_KEY`: API key for embeddings support (optional)
123+
- `ANTHROPIC_API_KEY`: API key for documentation generation and verification (required for doc scripts)
88124

89125
When connected to the svelte-llm MCP server, you have access to comprehensive Svelte 5 and SvelteKit documentation. Here's how to use the available tools effectively:
90126

package.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
"generate-distilled": "pnpm --filter @sveltejs/mcp-server run generate-distilled",
2323
"generate-distilled:dry-run": "pnpm --filter @sveltejs/mcp-server run generate-distilled:dry-run",
2424
"generate-distilled:debug": "pnpm --filter @sveltejs/mcp-server run generate-distilled:debug",
25+
"verify-distilled": "pnpm --filter @sveltejs/mcp-server run verify-distilled",
26+
"verify-distilled:dry-run": "pnpm --filter @sveltejs/mcp-server run verify-distilled:dry-run",
27+
"verify-distilled:debug": "pnpm --filter @sveltejs/mcp-server run verify-distilled:debug",
2528
"generate-prompt-docs": "node --import node-resolve-ts/register scripts/update-docs-prompts.ts",
2629
"release": "pnpm --filter @sveltejs/mcp run build && changeset publish",
2730
"changeset:version": "changeset version && pnpm --filter @sveltejs/mcp run update:version && git add --all"

packages/mcp-server/package.json

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,10 @@
1515
"generate-summaries:debug": "DEBUG_MODE=1 node --import node-resolve-ts/register scripts/generate-summaries.ts",
1616
"generate-distilled": "node --import node-resolve-ts/register scripts/generate-summaries.ts --prompt-type distilled",
1717
"generate-distilled:dry-run": "node --import node-resolve-ts/register scripts/generate-summaries.ts --prompt-type distilled --dry-run",
18-
"generate-distilled:debug": "DEBUG_MODE=1 node --import node-resolve-ts/register scripts/generate-summaries.ts --prompt-type distilled"
18+
"generate-distilled:debug": "DEBUG_MODE=1 node --import node-resolve-ts/register scripts/generate-summaries.ts --prompt-type distilled",
19+
"verify-distilled": "node --import node-resolve-ts/register scripts/verify-distilled.ts",
20+
"verify-distilled:dry-run": "node --import node-resolve-ts/register scripts/verify-distilled.ts --dry-run",
21+
"verify-distilled:debug": "DEBUG_MODE=1 node --import node-resolve-ts/register scripts/verify-distilled.ts"
1922
},
2023
"exports": {
2124
".": "./src/index.ts",
Lines changed: 295 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,295 @@
1+
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
2+
import { writeFile, mkdir, rm } from 'node:fs/promises';
3+
import path from 'node:path';
4+
import { fileURLToPath } from 'node:url';
5+
6+
const current_filename = fileURLToPath(import.meta.url);
7+
const current_dirname = path.dirname(current_filename);
8+
const test_output_dir = path.join(current_dirname, '../test-output');
9+
const test_verification_path = path.join(test_output_dir, 'distilled-verification.json');
10+
11+
interface VerificationResult {
12+
slug: string;
13+
status: 'ACCURATE' | 'NOT_ACCURATE';
14+
reasoning: string;
15+
}
16+
17+
interface VerificationOutput {
18+
generated_at: string;
19+
model: string;
20+
total_sections: number;
21+
verified_sections: number;
22+
accurate_count: number;
23+
not_accurate_count: number;
24+
results: VerificationResult[];
25+
}
26+
27+
function create_verification_output(
28+
results: VerificationResult[],
29+
total_sections: number = results.length,
30+
): VerificationOutput {
31+
const accurate_count = results.filter((r) => r.status === 'ACCURATE').length;
32+
const not_accurate_count = results.filter((r) => r.status === 'NOT_ACCURATE').length;
33+
34+
return {
35+
generated_at: new Date().toISOString(),
36+
model: 'claude-sonnet-4-5-20250929',
37+
total_sections,
38+
verified_sections: results.length,
39+
accurate_count,
40+
not_accurate_count,
41+
results,
42+
};
43+
}
44+
45+
describe('verify-distilled', () => {
46+
beforeEach(async () => {
47+
await mkdir(test_output_dir, { recursive: true });
48+
});
49+
50+
afterEach(async () => {
51+
try {
52+
await rm(test_output_dir, { recursive: true, force: true });
53+
} catch {
54+
// Ignore cleanup errors
55+
}
56+
});
57+
58+
describe('verification result structure', () => {
59+
it('should create valid verification output structure', () => {
60+
const results: VerificationResult[] = [
61+
{
62+
slug: 'svelte/overview',
63+
status: 'ACCURATE',
64+
reasoning: 'Summary accurately reflects original content',
65+
},
66+
];
67+
68+
const output = create_verification_output(results);
69+
70+
expect(output).toHaveProperty('generated_at');
71+
expect(output).toHaveProperty('model');
72+
expect(output).toHaveProperty('total_sections');
73+
expect(output).toHaveProperty('verified_sections');
74+
expect(output).toHaveProperty('accurate_count');
75+
expect(output).toHaveProperty('not_accurate_count');
76+
expect(output).toHaveProperty('results');
77+
expect(output.results).toHaveLength(1);
78+
});
79+
80+
it('should correctly count accurate vs not accurate results', () => {
81+
const results: VerificationResult[] = [
82+
{
83+
slug: 'svelte/overview',
84+
status: 'ACCURATE',
85+
reasoning: 'Good summary',
86+
},
87+
{
88+
slug: 'svelte/$state',
89+
status: 'NOT_ACCURATE',
90+
reasoning: 'Missing critical information',
91+
},
92+
{
93+
slug: 'svelte/$effect',
94+
status: 'ACCURATE',
95+
reasoning: 'Well condensed',
96+
},
97+
];
98+
99+
const output = create_verification_output(results);
100+
101+
expect(output.accurate_count).toBe(2);
102+
expect(output.not_accurate_count).toBe(1);
103+
expect(output.verified_sections).toBe(3);
104+
});
105+
});
106+
107+
describe('file operations', () => {
108+
it('should write verification results to JSON file', async () => {
109+
const results: VerificationResult[] = [
110+
{
111+
slug: 'svelte/overview',
112+
status: 'ACCURATE',
113+
reasoning: 'Summary is accurate',
114+
},
115+
];
116+
117+
const output = create_verification_output(results);
118+
119+
await writeFile(test_verification_path, JSON.stringify(output, null, 2), 'utf-8');
120+
121+
// Verify file was written
122+
const fs = await import('node:fs/promises');
123+
const content = await fs.readFile(test_verification_path, 'utf-8');
124+
const parsed = JSON.parse(content);
125+
126+
expect(parsed.results).toHaveLength(1);
127+
expect(parsed.results[0]?.slug).toBe('svelte/overview');
128+
expect(parsed.results[0]?.status).toBe('ACCURATE');
129+
});
130+
});
131+
132+
describe('verification status', () => {
133+
it('should mark summaries as ACCURATE when appropriate', () => {
134+
const result: VerificationResult = {
135+
slug: 'svelte/overview',
136+
status: 'ACCURATE',
137+
reasoning: 'Core concepts preserved, minor simplifications acceptable',
138+
};
139+
140+
expect(result.status).toBe('ACCURATE');
141+
expect(result.reasoning).toBeTruthy();
142+
});
143+
144+
it('should mark summaries as NOT_ACCURATE when appropriate', () => {
145+
const result: VerificationResult = {
146+
slug: 'svelte/$state',
147+
status: 'NOT_ACCURATE',
148+
reasoning: 'Factual error in code example',
149+
};
150+
151+
expect(result.status).toBe('NOT_ACCURATE');
152+
expect(result.reasoning).toBeTruthy();
153+
});
154+
});
155+
156+
describe('batch processing', () => {
157+
it('should handle multiple verification results', () => {
158+
const results: VerificationResult[] = [
159+
{
160+
slug: 'svelte/overview',
161+
status: 'ACCURATE',
162+
reasoning: 'Good',
163+
},
164+
{
165+
slug: 'svelte/$state',
166+
status: 'NOT_ACCURATE',
167+
reasoning: 'Missing info',
168+
},
169+
{
170+
slug: 'svelte/$effect',
171+
status: 'ACCURATE',
172+
reasoning: 'Well done',
173+
},
174+
{
175+
slug: 'svelte/$derived',
176+
status: 'ACCURATE',
177+
reasoning: 'Correct',
178+
},
179+
];
180+
181+
const output = create_verification_output(results);
182+
183+
expect(output.results).toHaveLength(4);
184+
expect(output.accurate_count).toBe(3);
185+
expect(output.not_accurate_count).toBe(1);
186+
});
187+
188+
it('should calculate percentages correctly', () => {
189+
const results: VerificationResult[] = Array.from({ length: 10 }, (_, i) => ({
190+
slug: `section-${i}`,
191+
status: i < 7 ? 'ACCURATE' : 'NOT_ACCURATE',
192+
reasoning: 'test',
193+
}));
194+
195+
const output = create_verification_output(results);
196+
197+
expect(output.verified_sections).toBe(10);
198+
expect(output.accurate_count).toBe(7);
199+
expect(output.not_accurate_count).toBe(3);
200+
201+
// 70% accurate, 30% not accurate
202+
const accurate_percentage = (output.accurate_count / output.verified_sections) * 100;
203+
const not_accurate_percentage = (output.not_accurate_count / output.verified_sections) * 100;
204+
205+
expect(accurate_percentage).toBe(70);
206+
expect(not_accurate_percentage).toBe(30);
207+
});
208+
});
209+
210+
describe('edge cases', () => {
211+
it('should handle empty results array', () => {
212+
const results: VerificationResult[] = [];
213+
const output = create_verification_output(results);
214+
215+
expect(output.results).toHaveLength(0);
216+
expect(output.accurate_count).toBe(0);
217+
expect(output.not_accurate_count).toBe(0);
218+
});
219+
220+
it('should handle all ACCURATE results', () => {
221+
const results: VerificationResult[] = [
222+
{
223+
slug: 'section-1',
224+
status: 'ACCURATE',
225+
reasoning: 'Good',
226+
},
227+
{
228+
slug: 'section-2',
229+
status: 'ACCURATE',
230+
reasoning: 'Great',
231+
},
232+
];
233+
234+
const output = create_verification_output(results);
235+
236+
expect(output.accurate_count).toBe(2);
237+
expect(output.not_accurate_count).toBe(0);
238+
});
239+
240+
it('should handle all NOT_ACCURATE results', () => {
241+
const results: VerificationResult[] = [
242+
{
243+
slug: 'section-1',
244+
status: 'NOT_ACCURATE',
245+
reasoning: 'Error',
246+
},
247+
{
248+
slug: 'section-2',
249+
status: 'NOT_ACCURATE',
250+
reasoning: 'Missing info',
251+
},
252+
];
253+
254+
const output = create_verification_output(results);
255+
256+
expect(output.accurate_count).toBe(0);
257+
expect(output.not_accurate_count).toBe(2);
258+
});
259+
});
260+
261+
describe('reasoning validation', () => {
262+
it('should include reasoning for each result', () => {
263+
const results: VerificationResult[] = [
264+
{
265+
slug: 'svelte/overview',
266+
status: 'ACCURATE',
267+
reasoning: 'Core concepts preserved',
268+
},
269+
{
270+
slug: 'svelte/$state',
271+
status: 'NOT_ACCURATE',
272+
reasoning: 'Code example contains error',
273+
},
274+
];
275+
276+
for (const result of results) {
277+
expect(result.reasoning).toBeTruthy();
278+
expect(result.reasoning.length).toBeGreaterThan(0);
279+
}
280+
});
281+
282+
it('should handle long reasoning text', () => {
283+
const long_reasoning = 'A'.repeat(500); // 500 character reasoning
284+
285+
const result: VerificationResult = {
286+
slug: 'svelte/overview',
287+
status: 'ACCURATE',
288+
reasoning: long_reasoning,
289+
};
290+
291+
expect(result.reasoning).toBe(long_reasoning);
292+
expect(result.reasoning.length).toBe(500);
293+
});
294+
});
295+
});

0 commit comments

Comments
 (0)