diff --git a/AGENTS.md b/AGENTS.md index 932af39..c2757cb 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -26,10 +26,11 @@ The project is organized as a monorepo using pnpm workspaces: ``` packages/ -├── cli/ # CLI entry point and MCP server implementation -├── core/ # Shared types, interfaces, and core logic -├── extract/ # Document extraction logic (AI integration) -└── vector-store/ # Vector database interaction and semantic search +├── cli/ # CLI entry point and MCP server +├── core/ # Shared types and interfaces +├── extract/ # Document extraction (Gemini, Ollama) +├── storage/ # SQLite persistence (Drizzle ORM) +└── vector-store/ # Vector database for semantic search ``` ## Setup Commands diff --git a/README.md b/README.md index 7c502bf..f04566d 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,13 @@ npm install -g doc-agent doc extract invoice.pdf ``` -**With specific provider:** +**With Ollama (local, privacy-first):** +```bash +# Ensure Ollama is running +doc extract invoice.pdf --provider ollama +``` + +**With Gemini (cloud):** ```bash export GEMINI_API_KEY=your_key_here doc extract invoice.pdf --provider gemini diff --git a/package.json b/package.json index e407f80..dfb5aaa 100644 --- a/package.json +++ b/package.json @@ -48,6 +48,7 @@ "@types/node": "^24.10.1", "@vitest/coverage-v8": "^4.0.15", "lint-staged": "16.2.7", + "tsup": "^8.5.1", "typescript": "^5.9.3", "vitest": "^4.0.15" } diff --git a/packages/cli/README.md b/packages/cli/README.md new file mode 100644 index 0000000..8706bc5 --- /dev/null +++ b/packages/cli/README.md @@ -0,0 +1,46 @@ +# @doc-agent/cli + +Command-line interface and MCP server for document extraction. + +## Use cases + +- Extract data from documents via terminal +- Integrate with Claude Desktop or Cursor via MCP +- Batch process directories of documents (planned) + +## Commands + +| Command | Description | +|---------|-------------| +| `doc extract ` | Extract structured data from PDF/image | +| `doc mcp` | Start MCP server for AI assistant integration | +| `doc search ` | Search indexed documents (planned) | +| `doc index ` | Batch index directory (planned) | + +## Options + +``` +extract: + -p, --provider AI provider: gemini, openai, ollama (default: ollama) + -m, --model Model name (default: llama3.2-vision) +``` + +## Environment variables + +| Variable | Required for | +|----------|--------------| +| `GEMINI_API_KEY` | `--provider gemini` | +| `OPENAI_API_KEY` | `--provider openai` | + +## MCP tools + +When running `doc mcp`, exposes: +- `extract_document` — Extract data from a file path +- `search_documents` — Search indexed documents (planned) + +## Depends on + +- `@doc-agent/core` — Types +- `@doc-agent/extract` — Extraction logic +- `@doc-agent/vector-store` — Search (planned) + diff --git a/packages/cli/package.json b/packages/cli/package.json index 06446f1..6c2a06e 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -10,7 +10,7 @@ "doc-agent": "./dist/cli.js" }, "scripts": { - "build": "tsc", + "build": "tsup", "dev": "tsx src/cli.ts", "mcp": "tsx src/mcp/server.ts" }, @@ -23,7 +23,8 @@ "chalk": "^5.6.2", "commander": "^14.0.2", "ora": "^9.0.0", - "vectordb": "^0.21.2" + "vectordb": "^0.21.2", + "zod": "^3.23.8" }, "devDependencies": { "@types/node": "^24.10.1", diff --git a/packages/cli/src/mcp/index.ts b/packages/cli/src/mcp/index.ts index 15a4170..ea08c29 100644 --- a/packages/cli/src/mcp/index.ts +++ b/packages/cli/src/mcp/index.ts @@ -1 +1,5 @@ -export { startMCPServer } from './server.js'; +// Server + +// SDK re-exports (for consumers who need MCP types) +export { McpServer, StdioServerTransport } from './sdk'; +export { startMCPServer } from './server'; diff --git a/packages/cli/src/mcp/sdk.ts b/packages/cli/src/mcp/sdk.ts new file mode 100644 index 0000000..c4948a3 --- /dev/null +++ b/packages/cli/src/mcp/sdk.ts @@ -0,0 +1,8 @@ +/** + * MCP SDK re-exports + * Barrel file to provide clean imports without .js extensions + */ + +// Server +export { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; +export { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; diff --git a/packages/cli/src/mcp/server.ts b/packages/cli/src/mcp/server.ts index 328a3c8..8992734 100644 --- a/packages/cli/src/mcp/server.ts +++ b/packages/cli/src/mcp/server.ts @@ -1,123 +1,63 @@ import type { Config } from '@doc-agent/core'; import { extractDocument } from '@doc-agent/extract'; -import { Server } from '@modelcontextprotocol/sdk/server'; -import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; -import { - type CallToolRequest, - CallToolRequestSchema, - ListToolsRequestSchema, -} from '@modelcontextprotocol/sdk/types.js'; +import { z } from 'zod'; +import { McpServer, StdioServerTransport } from './sdk'; -const server = new Server( - { - name: 'doc-agent', - version: '0.1.0', - }, - { - capabilities: { - tools: {}, - }, - } -); - -// List available tools -server.setRequestHandler(ListToolsRequestSchema, async () => { - return { - tools: [ - { - name: 'extract_document', - description: 'Extract structured data from invoice, receipt, or bank statement', - inputSchema: { - type: 'object', - properties: { - filepath: { - type: 'string', - description: 'Path to the document file', - }, - provider: { - type: 'string', - enum: ['gemini', 'openai', 'ollama'], - description: 'AI provider to use', - default: 'gemini', - }, - }, - required: ['filepath'], - }, - }, - { - name: 'search_documents', - description: 'Search indexed documents using natural language', - inputSchema: { - type: 'object', - properties: { - query: { - type: 'string', - description: 'Search query in natural language', - }, - limit: { - type: 'number', - description: 'Maximum number of results', - default: 10, - }, - }, - required: ['query'], - }, - }, - ], - }; +const server = new McpServer({ + name: 'doc-agent', + version: '0.1.0', }); -// Handle tool calls -server.setRequestHandler(CallToolRequestSchema, async (request: CallToolRequest) => { - if (request.params.name === 'extract_document') { - const { filepath, provider = 'gemini' } = request.params.arguments as { - filepath: string; - provider?: string; - }; - +// Register extract_document tool +server.registerTool( + 'extract_document', + { + description: 'Extract structured data from invoice, receipt, or bank statement', + inputSchema: { + filepath: z.string().describe('Path to the document file'), + provider: z + .enum(['gemini', 'openai', 'ollama']) + .default('gemini') + .describe('AI provider to use'), + }, + }, + async ({ filepath, provider }) => { const config: Config = { - aiProvider: provider as 'gemini' | 'openai' | 'ollama', + aiProvider: provider, geminiApiKey: process.env.GEMINI_API_KEY, openaiApiKey: process.env.OPENAI_API_KEY, }; try { const result = await extractDocument(filepath, config); - return { - content: [ - { - type: 'text', - text: JSON.stringify(result, null, 2), - }, - ], + content: [{ type: 'text', text: JSON.stringify(result, null, 2) }], }; } catch (error) { return { - content: [ - { - type: 'text', - text: `Error: ${(error as Error).message}`, - }, - ], + content: [{ type: 'text', text: `Error: ${(error as Error).message}` }], isError: true, }; } } +); - if (request.params.name === 'search_documents') { +// Register search_documents tool +server.registerTool( + 'search_documents', + { + description: 'Search indexed documents using natural language', + inputSchema: { + query: z.string().describe('Search query in natural language'), + limit: z.number().default(10).describe('Maximum number of results'), + }, + }, + async () => { return { - content: [ - { - type: 'text', - text: 'Search functionality not yet implemented', - }, - ], + content: [{ type: 'text', text: 'Search functionality not yet implemented' }], }; } - - throw new Error(`Unknown tool: ${request.params.name}`); -}); +); export async function startMCPServer() { const transport = new StdioServerTransport(); diff --git a/packages/cli/tsconfig.json b/packages/cli/tsconfig.json index 8e1fa55..a341f5b 100644 --- a/packages/cli/tsconfig.json +++ b/packages/cli/tsconfig.json @@ -5,6 +5,9 @@ "rootDir": "./src" }, "include": ["src/**/*"], - "exclude": ["node_modules", "dist"], - "references": [{ "path": "../core" }, { "path": "../extract" }, { "path": "../vector-store" }] + "exclude": ["node_modules", "dist", "**/*.test.ts"], + "references": [{ "path": "../core" }, { "path": "../extract" }, { "path": "../vector-store" }], + "ts-node": { + "esm": true + } } diff --git a/packages/cli/tsup.config.ts b/packages/cli/tsup.config.ts new file mode 100644 index 0000000..0d8c282 --- /dev/null +++ b/packages/cli/tsup.config.ts @@ -0,0 +1,27 @@ +import { defineConfig } from 'tsup'; + +export default defineConfig({ + entry: { + cli: 'src/cli.ts', + 'mcp/server': 'src/mcp/server.ts', + 'mcp/index': 'src/mcp/index.ts', + }, + format: ['esm'], + dts: false, + sourcemap: true, + clean: true, + splitting: false, + treeshake: true, + external: [ + '@doc-agent/core', + '@doc-agent/extract', + '@doc-agent/vector-store', + '@google/generative-ai', + '@modelcontextprotocol/sdk', + 'chalk', + 'commander', + 'ora', + 'vectordb', + ], + tsconfig: './tsconfig.json', +}); diff --git a/packages/core/README.md b/packages/core/README.md new file mode 100644 index 0000000..80182e6 --- /dev/null +++ b/packages/core/README.md @@ -0,0 +1,15 @@ +# @doc-agent/core + +Shared TypeScript types for document extraction and search. + +## Types + +- `DocumentData` — Extracted document (invoice, receipt, bank statement) +- `LineItem` — Individual line item with description, quantity, price +- `SearchResult` — Vector search result with similarity score +- `Config` — Provider configuration (gemini, openai, ollama) + +## Used by + +All other packages depend on these types. + diff --git a/packages/core/package.json b/packages/core/package.json index bedac96..a311a16 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -14,8 +14,8 @@ } }, "scripts": { - "build": "tsc", - "dev": "tsc --watch", + "build": "tsup", + "dev": "tsup --watch", "clean": "rm -rf dist", "typecheck": "tsc --noEmit", "lint": "biome lint ./src", diff --git a/packages/core/tsup.config.ts b/packages/core/tsup.config.ts new file mode 100644 index 0000000..da701cb --- /dev/null +++ b/packages/core/tsup.config.ts @@ -0,0 +1,12 @@ +import { defineConfig } from 'tsup'; + +export default defineConfig({ + entry: ['src/index.ts'], + format: ['esm'], + dts: true, + sourcemap: true, + clean: true, + splitting: false, + treeshake: true, + tsconfig: './tsconfig.json', +}); diff --git a/packages/extract/README.md b/packages/extract/README.md new file mode 100644 index 0000000..a4a26b8 --- /dev/null +++ b/packages/extract/README.md @@ -0,0 +1,35 @@ +# @doc-agent/extract + +Extract structured data from invoices, receipts, and bank statements using Vision AI. + +## Use cases + +- Parse PDF or image documents into structured JSON +- Extract vendor, amount, date, line items from financial documents +- Local privacy-first extraction without cloud APIs (Ollama) + +## Exports + +- `extractDocument(path, config)` — Main extraction function +- `getMimeType(path)` — Detect MIME type from file extension + +## Providers + +| Provider | Requires | Best for | +|----------|----------|----------| +| `gemini` | `GEMINI_API_KEY` env var | Accuracy, speed | +| `ollama` | Local Ollama + `llama3.2-vision` | Privacy, offline | + +## Depends on + +- `@doc-agent/core` — DocumentData, Config types + +## Used by + +- `@doc-agent/cli` — extract command, MCP server + +## Gotchas + +- Ollama retries once on Zod validation failure (LLM output can be inconsistent) +- PDF support requires vision-capable model + diff --git a/packages/extract/package.json b/packages/extract/package.json index babe286..7874465 100644 --- a/packages/extract/package.json +++ b/packages/extract/package.json @@ -13,8 +13,8 @@ } }, "scripts": { - "build": "tsc", - "dev": "tsc --watch", + "build": "tsup", + "dev": "tsup --watch", "clean": "rm -rf dist", "typecheck": "tsc --noEmit", "lint": "biome lint ./src", @@ -24,7 +24,8 @@ }, "dependencies": { "@doc-agent/core": "workspace:*", - "@google/generative-ai": "^0.24.1" + "@google/generative-ai": "^0.24.1", + "zod": "^3.23.8" }, "devDependencies": { "@types/node": "^24.10.1", diff --git a/packages/extract/src/__tests__/extract.test.ts b/packages/extract/src/__tests__/extract.test.ts new file mode 100644 index 0000000..e4c6619 --- /dev/null +++ b/packages/extract/src/__tests__/extract.test.ts @@ -0,0 +1,22 @@ +import type { Config } from '@doc-agent/core'; +import { describe, expect, it } from 'vitest'; +import { extractDocument } from '../index'; + +describe('extractDocument', () => { + it('should throw error for unsupported provider', async () => { + const fs = await import('node:fs'); + const path = await import('node:path'); + const os = await import('node:os'); + const tmpDir = os.tmpdir(); + const testFile = path.join(tmpDir, 'test-unsupported.pdf'); + fs.writeFileSync(testFile, Buffer.from('test pdf content')); + + const config: Config = { + aiProvider: 'openai' as 'gemini' | 'openai' | 'ollama', + }; + + await expect(extractDocument(testFile, config)).rejects.toThrow('not yet implemented'); + + fs.unlinkSync(testFile); + }); +}); diff --git a/packages/extract/src/__tests__/gemini.test.ts b/packages/extract/src/__tests__/gemini.test.ts new file mode 100644 index 0000000..b208c35 --- /dev/null +++ b/packages/extract/src/__tests__/gemini.test.ts @@ -0,0 +1,110 @@ +import type { Config } from '@doc-agent/core'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +// Mock GoogleGenerativeAI - use hoisted to avoid initialization issues +const mockGenerateContent = vi.hoisted(() => vi.fn()); + +vi.mock('@google/generative-ai', () => { + // Reference the hoisted mock from outer scope + return { + GoogleGenerativeAI: class { + getGenerativeModel() { + return { + generateContent: mockGenerateContent, + }; + } + }, + }; +}); + +import { extractDocument } from '../index'; + +describe('Gemini Extraction', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('should extract document data from Gemini API', async () => { + mockGenerateContent.mockResolvedValueOnce({ + response: { + text: () => + JSON.stringify({ + type: 'invoice', + vendor: 'Test Company', + amount: 200.0, + date: '2025-12-07', + items: [{ description: 'Service', total: 200.0 }], + }), + }, + }); + + const fs = await import('node:fs'); + const path = await import('node:path'); + const os = await import('node:os'); + const tmpDir = os.tmpdir(); + const testFile = path.join(tmpDir, 'test-gemini.pdf'); + fs.writeFileSync(testFile, Buffer.from('test pdf content')); + + const config: Config = { + aiProvider: 'gemini', + geminiApiKey: 'test-api-key', + }; + + const result = await extractDocument(testFile, config); + + expect(result.type).toBe('invoice'); + expect(result.vendor).toBe('Test Company'); + expect(result.amount).toBe(200.0); + expect(result.id).toBeDefined(); + expect(result.filename).toBe('test-gemini.pdf'); + expect(result.extractedAt).toBeInstanceOf(Date); + expect(mockGenerateContent).toHaveBeenCalled(); + + fs.unlinkSync(testFile); + }); + + it('should handle JSON wrapped in markdown code blocks', async () => { + mockGenerateContent.mockResolvedValueOnce({ + response: { + text: () => '```json\n{"type": "receipt", "amount": 50.0}\n```', + }, + }); + + const fs = await import('node:fs'); + const path = await import('node:path'); + const os = await import('node:os'); + const tmpDir = os.tmpdir(); + const testFile = path.join(tmpDir, 'test-markdown.pdf'); + fs.writeFileSync(testFile, Buffer.from('test pdf content')); + + const config: Config = { + aiProvider: 'gemini', + geminiApiKey: 'test-api-key', + }; + + const result = await extractDocument(testFile, config); + + expect(result.type).toBe('receipt'); + expect(result.amount).toBe(50.0); + + fs.unlinkSync(testFile); + }); + + it('should throw error when API key is missing', async () => { + const fs = await import('node:fs'); + const path = await import('node:path'); + const os = await import('node:os'); + const tmpDir = os.tmpdir(); + const testFile = path.join(tmpDir, 'test-no-key.pdf'); + fs.writeFileSync(testFile, Buffer.from('test pdf content')); + + const config: Config = { + aiProvider: 'gemini', + // No geminiApiKey + }; + + await expect(extractDocument(testFile, config)).rejects.toThrow('Gemini API key required'); + + fs.unlinkSync(testFile); + }); +}); diff --git a/packages/extract/src/__tests__/mime-type.test.ts b/packages/extract/src/__tests__/mime-type.test.ts new file mode 100644 index 0000000..16472df --- /dev/null +++ b/packages/extract/src/__tests__/mime-type.test.ts @@ -0,0 +1,35 @@ +import { describe, expect, it } from 'vitest'; +import { getMimeType } from '../index'; + +describe('getMimeType', () => { + it('should detect PDF MIME type', () => { + expect(getMimeType('invoice.pdf')).toBe('application/pdf'); + expect(getMimeType('/path/to/document.PDF')).toBe('application/pdf'); + }); + + it('should detect PNG MIME type', () => { + expect(getMimeType('receipt.png')).toBe('image/png'); + expect(getMimeType('image.PNG')).toBe('image/png'); + }); + + it('should detect JPEG MIME type', () => { + expect(getMimeType('photo.jpg')).toBe('image/jpeg'); + expect(getMimeType('image.JPG')).toBe('image/jpeg'); + expect(getMimeType('photo.jpeg')).toBe('image/jpeg'); + expect(getMimeType('image.JPEG')).toBe('image/jpeg'); + }); + + it('should detect GIF MIME type', () => { + expect(getMimeType('animation.gif')).toBe('image/gif'); + }); + + it('should detect WebP MIME type', () => { + expect(getMimeType('image.webp')).toBe('image/webp'); + }); + + it('should default to PDF for unknown extensions', () => { + expect(getMimeType('document.txt')).toBe('application/pdf'); + expect(getMimeType('file.unknown')).toBe('application/pdf'); + expect(getMimeType('noextension')).toBe('application/pdf'); + }); +}); diff --git a/packages/extract/src/__tests__/ollama.test.ts b/packages/extract/src/__tests__/ollama.test.ts new file mode 100644 index 0000000..7abded8 --- /dev/null +++ b/packages/extract/src/__tests__/ollama.test.ts @@ -0,0 +1,296 @@ +import type { Config } from '@doc-agent/core'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { extractDocument } from '../index'; + +// Mock fetch globally +const mockFetch = vi.fn(); +global.fetch = mockFetch; + +describe('Ollama Extraction', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('should extract document data from Ollama API', async () => { + const mockResponse = { + response: JSON.stringify({ + type: 'invoice', + vendor: 'Test Company', + amount: 100.5, + date: '2025-12-07', + items: [{ description: 'Test item', total: 100.5 }], + }), + }; + + mockFetch.mockResolvedValueOnce({ + ok: true, + json: async () => mockResponse, + }); + + // Create a temporary file for testing + const fs = await import('node:fs'); + const path = await import('node:path'); + const os = await import('node:os'); + const tmpDir = os.tmpdir(); + const testFile = path.join(tmpDir, 'test-invoice.pdf'); + + // Create a minimal PDF file (just for testing file reading) + fs.writeFileSync(testFile, Buffer.from('test pdf content')); + + const config: Config = { + aiProvider: 'ollama', + ollamaModel: 'llama3.2-vision', + }; + + const result = await extractDocument(testFile, config); + + expect(result.type).toBe('invoice'); + expect(result.vendor).toBe('Test Company'); + expect(result.amount).toBe(100.5); + expect(result.date).toBe('2025-12-07'); + expect(result.items).toHaveLength(1); + expect(result.id).toBeDefined(); + expect(result.filename).toBe('test-invoice.pdf'); + expect(result.extractedAt).toBeInstanceOf(Date); + + // Verify API call + expect(mockFetch).toHaveBeenCalledWith('http://localhost:11434/api/generate', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: expect.stringContaining('llama3.2-vision'), + }); + + // Cleanup + fs.unlinkSync(testFile); + }); + + it('should retry once on Zod validation failure', async () => { + const invalidResponse = { + response: JSON.stringify({ + type: 'invalid_type', // Invalid type + vendor: 'Test Company', + }), + }; + + const validResponse = { + response: JSON.stringify({ + type: 'receipt', + vendor: 'Test Company', + amount: 50.0, + }), + }; + + // First call returns invalid data, second call returns valid data + mockFetch + .mockResolvedValueOnce({ + ok: true, + json: async () => invalidResponse, + }) + .mockResolvedValueOnce({ + ok: true, + json: async () => validResponse, + }); + + const fs = await import('node:fs'); + const path = await import('node:path'); + const os = await import('node:os'); + const tmpDir = os.tmpdir(); + const testFile = path.join(tmpDir, 'test-receipt.pdf'); + fs.writeFileSync(testFile, Buffer.from('test pdf content')); + + const config: Config = { + aiProvider: 'ollama', + ollamaModel: 'llama3.2-vision', + }; + + const result = await extractDocument(testFile, config); + + expect(result.type).toBe('receipt'); + expect(mockFetch).toHaveBeenCalledTimes(2); // Should retry once + + fs.unlinkSync(testFile); + }); + + it('should handle API errors', async () => { + mockFetch.mockResolvedValueOnce({ + ok: false, + status: 500, + text: async () => 'Internal Server Error', + }); + + const fs = await import('node:fs'); + const path = await import('node:path'); + const os = await import('node:os'); + const tmpDir = os.tmpdir(); + const testFile = path.join(tmpDir, 'test-error.pdf'); + fs.writeFileSync(testFile, Buffer.from('test pdf content')); + + const config: Config = { + aiProvider: 'ollama', + ollamaModel: 'llama3.2-vision', + }; + + await expect(extractDocument(testFile, config)).rejects.toThrow('Ollama API error'); + + fs.unlinkSync(testFile); + }); + + it('should handle JSON parse errors', async () => { + const invalidJsonResponse = { + response: 'not valid json {', + }; + + mockFetch.mockResolvedValueOnce({ + ok: true, + json: async () => invalidJsonResponse, + }); + + const fs = await import('node:fs'); + const path = await import('node:path'); + const os = await import('node:os'); + const tmpDir = os.tmpdir(); + const testFile = path.join(tmpDir, 'test-parse-error.pdf'); + fs.writeFileSync(testFile, Buffer.from('test pdf content')); + + const config: Config = { + aiProvider: 'ollama', + ollamaModel: 'llama3.2-vision', + }; + + await expect(extractDocument(testFile, config)).rejects.toThrow('Failed to parse JSON'); + + fs.unlinkSync(testFile); + }); + + it('should use default model if not specified', async () => { + const mockResponse = { + response: JSON.stringify({ + type: 'receipt', + }), + }; + + mockFetch.mockResolvedValueOnce({ + ok: true, + json: async () => mockResponse, + }); + + const fs = await import('node:fs'); + const path = await import('node:path'); + const os = await import('node:os'); + const tmpDir = os.tmpdir(); + const testFile = path.join(tmpDir, 'test-default.pdf'); + fs.writeFileSync(testFile, Buffer.from('test pdf content')); + + const config: Config = { + aiProvider: 'ollama', + // No ollamaModel specified + }; + + await extractDocument(testFile, config); + + const callBody = JSON.parse(mockFetch.mock.calls[0][1].body as string); + expect(callBody.model).toBe('llama3.2-vision'); // Default model + + fs.unlinkSync(testFile); + }); + + it('should not retry on non-Zod errors', async () => { + mockFetch.mockResolvedValueOnce({ + ok: false, + status: 500, + text: async () => 'Internal Server Error', + }); + + const fs = await import('node:fs'); + const path = await import('node:path'); + const os = await import('node:os'); + const tmpDir = os.tmpdir(); + const testFile = path.join(tmpDir, 'test-no-retry.pdf'); + fs.writeFileSync(testFile, Buffer.from('test pdf content')); + + const config: Config = { + aiProvider: 'ollama', + ollamaModel: 'llama3.2-vision', + }; + + await expect(extractDocument(testFile, config)).rejects.toThrow('Ollama API error'); + + // Should not retry on API errors + expect(mockFetch).toHaveBeenCalledTimes(1); + + fs.unlinkSync(testFile); + }); + + it('should handle different image MIME types', async () => { + const mockResponse = { + response: JSON.stringify({ + type: 'receipt', + vendor: 'Store', + }), + }; + + mockFetch.mockResolvedValueOnce({ + ok: true, + json: async () => mockResponse, + }); + + const fs = await import('node:fs'); + const path = await import('node:path'); + const os = await import('node:os'); + const tmpDir = os.tmpdir(); + const testFile = path.join(tmpDir, 'receipt.png'); + fs.writeFileSync(testFile, Buffer.from('test image content')); + + const config: Config = { + aiProvider: 'ollama', + ollamaModel: 'llama3.2-vision', + }; + + await extractDocument(testFile, config); + + const callBody = JSON.parse(mockFetch.mock.calls[0][1].body as string); + expect(callBody.prompt).toContain('image'); // Should detect image type + + fs.unlinkSync(testFile); + }); + + it('should handle retry exhaustion (Zod error persists)', async () => { + const invalidResponse = { + response: JSON.stringify({ + type: 'invalid_type', // Invalid type + }), + }; + + // Both calls return invalid data + mockFetch + .mockResolvedValueOnce({ + ok: true, + json: async () => invalidResponse, + }) + .mockResolvedValueOnce({ + ok: true, + json: async () => invalidResponse, + }); + + const fs = await import('node:fs'); + const path = await import('node:path'); + const os = await import('node:os'); + const tmpDir = os.tmpdir(); + const testFile = path.join(tmpDir, 'test-retry-exhausted.pdf'); + fs.writeFileSync(testFile, Buffer.from('test pdf content')); + + const config: Config = { + aiProvider: 'ollama', + ollamaModel: 'llama3.2-vision', + }; + + await expect(extractDocument(testFile, config)).rejects.toThrow(); + + // Should retry once, then fail + expect(mockFetch).toHaveBeenCalledTimes(2); + + fs.unlinkSync(testFile); + }); +}); diff --git a/packages/extract/src/index.ts b/packages/extract/src/index.ts index 6a0f5e0..963c432 100644 --- a/packages/extract/src/index.ts +++ b/packages/extract/src/index.ts @@ -1,6 +1,39 @@ import { readFileSync } from 'node:fs'; +import { extname } from 'node:path'; import type { Config, DocumentData } from '@doc-agent/core'; import { GoogleGenerativeAI } from '@google/generative-ai'; +import { z } from 'zod'; + +// Zod schema for DocumentData validation +const LineItemSchema = z.object({ + description: z.string(), + quantity: z.number().optional(), + unitPrice: z.number().optional(), + total: z.number().optional(), +}); + +const DocumentDataSchema = z.object({ + type: z.enum(['invoice', 'receipt', 'bank_statement', 'other']), + vendor: z.string().optional(), + amount: z.number().optional(), + date: z.string().optional(), + items: z.array(LineItemSchema).optional(), + rawText: z.string().optional(), +}); + +// Helper to detect MIME type from file extension +export function getMimeType(filePath: string): string { + const ext = extname(filePath).toLowerCase(); + const mimeTypes: Record = { + '.pdf': 'application/pdf', + '.png': 'image/png', + '.jpg': 'image/jpeg', + '.jpeg': 'image/jpeg', + '.gif': 'image/gif', + '.webp': 'image/webp', + }; + return mimeTypes[ext] || 'application/pdf'; +} export async function extractDocument(filePath: string, config: Config): Promise { const fileBuffer = readFileSync(filePath); @@ -10,6 +43,10 @@ export async function extractDocument(filePath: string, config: Config): Promise return extractWithGemini(filePath, base64, config); } + if (config.aiProvider === 'ollama') { + return extractWithOllama(filePath, base64, config); + } + throw new Error(`Provider ${config.aiProvider} not yet implemented`); } @@ -56,3 +93,83 @@ Only respond with valid JSON, no markdown formatting.`; ...extracted, }; } + +async function extractWithOllama( + filePath: string, + base64: string, + config: Config, + retryCount = 0 +): Promise { + const model = config.ollamaModel || 'llama3.2-vision'; + const mimeType = getMimeType(filePath); + + const systemPrompt = `You are a document extraction assistant. Extract structured data from invoices, receipts, and bank statements. + +CRITICAL: You must respond with ONLY valid JSON, no markdown, no code blocks, no explanations. Just the raw JSON object. + +Expected JSON format: +{ + "type": "invoice" | "receipt" | "bank_statement" | "other", + "vendor": "company or vendor name if available", + "amount": total_amount_as_number_if_available, + "date": "YYYY-MM-DD format if available", + "items": [{"description": "item description", "quantity": number, "unitPrice": number, "total": number}] +} + +All fields except "type" are optional. Only include fields you can confidently extract.`; + + const userPrompt = `Extract structured data from this ${mimeType.includes('image') ? 'image' : 'PDF'} document. Return only valid JSON.`; + + try { + const response = await fetch('http://localhost:11434/api/generate', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + model, + prompt: userPrompt, + system: systemPrompt, + images: [base64], + stream: false, + format: 'json', + }), + }); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error(`Ollama API error: ${response.status} ${errorText}`); + } + + const data = (await response.json()) as { response: string }; + let parsed: unknown; + + try { + // Clean up response (remove markdown code blocks if present) + const cleaned = data.response + .replace(/```json\n?/g, '') + .replace(/```\n?/g, '') + .trim(); + parsed = JSON.parse(cleaned); + } catch (_parseError) { + throw new Error(`Failed to parse JSON response: ${data.response}`); + } + + // Validate with Zod + const validated = DocumentDataSchema.parse(parsed); + + // Build complete DocumentData object + return { + id: crypto.randomUUID(), + filename: filePath.split('/').pop() || 'unknown', + extractedAt: new Date(), + ...validated, + }; + } catch (error) { + // Retry once on validation failure + if (retryCount === 0 && error instanceof z.ZodError) { + return extractWithOllama(filePath, base64, config, 1); + } + throw error; + } +} diff --git a/packages/extract/tsconfig.json b/packages/extract/tsconfig.json index 2b66c79..e95071d 100644 --- a/packages/extract/tsconfig.json +++ b/packages/extract/tsconfig.json @@ -2,7 +2,8 @@ "extends": "../../tsconfig.json", "compilerOptions": { "outDir": "./dist", - "rootDir": "./src" + "rootDir": "./src", + "composite": true }, "include": ["src/**/*"], "exclude": ["node_modules", "dist", "**/*.test.ts"], diff --git a/packages/extract/tsup.config.ts b/packages/extract/tsup.config.ts new file mode 100644 index 0000000..8c55c62 --- /dev/null +++ b/packages/extract/tsup.config.ts @@ -0,0 +1,13 @@ +import { defineConfig } from 'tsup'; + +export default defineConfig({ + entry: ['src/index.ts'], + format: ['esm'], + dts: false, + sourcemap: true, + clean: true, + splitting: false, + treeshake: true, + external: ['@doc-agent/core'], + tsconfig: './tsconfig.json', +}); diff --git a/packages/storage/README.md b/packages/storage/README.md new file mode 100644 index 0000000..5182ddb --- /dev/null +++ b/packages/storage/README.md @@ -0,0 +1,36 @@ +# @doc-agent/storage + +SQLite persistence layer for extracted documents using Drizzle ORM. + +## Use cases + +- Store extracted document data locally +- Query documents by ID or list all +- Track document processing status (pending, indexed, failed) + +## Exports + +- `DocumentRepository` — Main repository class +- `storage` — Singleton instance +- `createDb(connectionString?)` — Database factory +- `getDbPath()` — Default database location +- `documents` — Drizzle schema table + +## Schema + +Documents table stores: +- `id`, `path`, `hash`, `status`, `data` (JSON), `createdAt` + +## Storage location + +Default: `~/.local/share/doc-agent/doc-agent.db` (via `env-paths`) + +## Depends on + +- `@doc-agent/core` — DocumentData type + +## Gotchas + +- Migrations run lazily on first DB access +- Uses better-sqlite3 (synchronous, embedded) + diff --git a/packages/storage/package.json b/packages/storage/package.json index 08c9f87..591f790 100644 --- a/packages/storage/package.json +++ b/packages/storage/package.json @@ -13,8 +13,8 @@ } }, "scripts": { - "build": "tsc", - "dev": "tsc --watch", + "build": "tsup", + "dev": "tsup --watch", "clean": "rm -rf dist", "typecheck": "tsc --noEmit", "lint": "biome lint ./src", diff --git a/packages/storage/src/__tests__/db.test.ts b/packages/storage/src/__tests__/db.test.ts index f062c6f..b3c2f7d 100644 --- a/packages/storage/src/__tests__/db.test.ts +++ b/packages/storage/src/__tests__/db.test.ts @@ -4,7 +4,7 @@ import * as path from 'node:path'; import Database from 'better-sqlite3'; import { drizzle } from 'drizzle-orm/better-sqlite3'; import { beforeEach, describe, expect, it, vi } from 'vitest'; -import { createDb, ensureDirectoryExists, getDbPath, runMigrations } from '../db.js'; +import { createDb, ensureDirectoryExists, getDbPath, runMigrations } from '../db'; // Mock the migrator module vi.mock('drizzle-orm/better-sqlite3/migrator', () => ({ diff --git a/packages/storage/src/__tests__/repository.test.ts b/packages/storage/src/__tests__/repository.test.ts index 1d3c5e6..8f6939f 100644 --- a/packages/storage/src/__tests__/repository.test.ts +++ b/packages/storage/src/__tests__/repository.test.ts @@ -1,7 +1,7 @@ import type { DocumentData } from '@doc-agent/core'; import { beforeEach, describe, expect, it } from 'vitest'; -import { createDb } from '../db.js'; -import { DocumentRepository } from '../index.js'; +import { createDb } from '../db'; +import { DocumentRepository } from '../index'; describe('DocumentRepository', () => { let repo: DocumentRepository; diff --git a/packages/storage/src/db.ts b/packages/storage/src/db.ts index 659e6fa..177bb99 100644 --- a/packages/storage/src/db.ts +++ b/packages/storage/src/db.ts @@ -6,7 +6,7 @@ import Database from 'better-sqlite3'; import { drizzle } from 'drizzle-orm/better-sqlite3'; import { migrate } from 'drizzle-orm/better-sqlite3/migrator'; import envPaths from 'env-paths'; -import * as schema from './schema.js'; +import * as schema from './schema'; const logger = kero.createLogger(); diff --git a/packages/storage/src/index.ts b/packages/storage/src/index.ts index 5793fbc..71e0720 100644 --- a/packages/storage/src/index.ts +++ b/packages/storage/src/index.ts @@ -1,7 +1,13 @@ import type { DocumentData } from '@doc-agent/core'; import { eq } from 'drizzle-orm'; -import { createDb, type DbClient } from './db.js'; -import { documents, type NewDocument } from './schema.js'; +import { createDb, type DbClient } from './db'; +import { documents, type NewDocument } from './schema'; + +// Re-export db utilities +export { createDb, type DbClient, getDbPath } from './db'; + +// Re-export schema types +export { type Document, documents, type NewDocument } from './schema'; export class DocumentRepository { private db: DbClient; diff --git a/packages/storage/tsconfig.json b/packages/storage/tsconfig.json index 90d76d7..e95071d 100644 --- a/packages/storage/tsconfig.json +++ b/packages/storage/tsconfig.json @@ -2,7 +2,10 @@ "extends": "../../tsconfig.json", "compilerOptions": { "outDir": "./dist", - "rootDir": "./src" + "rootDir": "./src", + "composite": true }, - "include": ["src/**/*"] + "include": ["src/**/*"], + "exclude": ["node_modules", "dist", "**/*.test.ts"], + "references": [{ "path": "../core" }] } diff --git a/packages/storage/tsup.config.ts b/packages/storage/tsup.config.ts new file mode 100644 index 0000000..8c55c62 --- /dev/null +++ b/packages/storage/tsup.config.ts @@ -0,0 +1,13 @@ +import { defineConfig } from 'tsup'; + +export default defineConfig({ + entry: ['src/index.ts'], + format: ['esm'], + dts: false, + sourcemap: true, + clean: true, + splitting: false, + treeshake: true, + external: ['@doc-agent/core'], + tsconfig: './tsconfig.json', +}); diff --git a/packages/vector-store/README.md b/packages/vector-store/README.md new file mode 100644 index 0000000..f328adb --- /dev/null +++ b/packages/vector-store/README.md @@ -0,0 +1,23 @@ +# @doc-agent/vector-store + +Vector database for semantic search over documents. Uses LanceDB. + +## Status + +**Not yet implemented.** Stubbed for future development. + +## Planned use cases + +- Index extracted documents as embeddings +- Natural language search over document contents +- Find similar documents + +## Exports + +- `VectorStore` — Main class (stubbed) +- `createVectorStore(config)` — Factory function + +## Depends on + +- `@doc-agent/core` — DocumentData, SearchResult, Config types + diff --git a/packages/vector-store/package.json b/packages/vector-store/package.json index 3a5fd32..6504171 100644 --- a/packages/vector-store/package.json +++ b/packages/vector-store/package.json @@ -13,8 +13,8 @@ } }, "scripts": { - "build": "tsc", - "dev": "tsc --watch", + "build": "tsup", + "dev": "tsup --watch", "clean": "rm -rf dist", "typecheck": "tsc --noEmit", "lint": "biome lint ./src", diff --git a/packages/vector-store/tsconfig.json b/packages/vector-store/tsconfig.json index b65ecf0..7134d2c 100644 --- a/packages/vector-store/tsconfig.json +++ b/packages/vector-store/tsconfig.json @@ -2,7 +2,8 @@ "extends": "../../tsconfig.json", "compilerOptions": { "outDir": "./dist", - "rootDir": "./src" + "rootDir": "./src", + "composite": true }, "include": ["src/**/*"], "exclude": ["node_modules", "dist", "**/*.test.ts"], diff --git a/packages/vector-store/tsup.config.ts b/packages/vector-store/tsup.config.ts new file mode 100644 index 0000000..908f1fd --- /dev/null +++ b/packages/vector-store/tsup.config.ts @@ -0,0 +1,13 @@ +import { defineConfig } from 'tsup'; + +export default defineConfig({ + entry: ['src/index.ts'], + format: ['esm'], + dts: false, + sourcemap: true, + clean: true, + splitting: false, + treeshake: true, + external: ['@doc-agent/core', '@doc-agent/extract'], + tsconfig: './tsconfig.json', +}); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 12d3001..424206c 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -23,6 +23,9 @@ importers: lint-staged: specifier: 16.2.7 version: 16.2.7 + tsup: + specifier: ^8.5.1 + version: 8.5.1(postcss@8.5.6)(tsx@4.21.0)(typescript@5.9.3)(yaml@2.8.2) typescript: specifier: ^5.9.3 version: 5.9.3 @@ -46,7 +49,7 @@ importers: version: 0.24.1 '@modelcontextprotocol/sdk': specifier: ^1.24.3 - version: 1.24.3(zod@4.1.13) + version: 1.24.3(zod@3.25.76) chalk: specifier: ^5.6.2 version: 5.6.2 @@ -59,6 +62,9 @@ importers: vectordb: specifier: ^0.21.2 version: 0.21.2(@apache-arrow/ts@14.0.2)(apache-arrow@14.0.2) + zod: + specifier: ^3.23.8 + version: 3.25.76 devDependencies: '@types/node': specifier: ^24.10.1 @@ -84,6 +90,9 @@ importers: '@google/generative-ai': specifier: ^0.24.1 version: 0.24.1 + zod: + specifier: ^3.23.8 + version: 3.25.76 devDependencies: '@types/node': specifier: ^24.10.1 @@ -819,6 +828,9 @@ packages: resolution: {integrity: sha512-MqO+MLfM6kjxcKoy0p1wRzG3b4ZZXtPI+z2IE26UogS2Cm/XHO+7gGRBh6gcJsOiIVoH93UwKvW4HdgiOZCy9Q==} engines: {node: '>=18.0.0'} + '@jridgewell/gen-mapping@0.3.13': + resolution: {integrity: sha512-2kkt/7niJ6MgEPxF0bYdQ6etZaA+fQvDcLKckhy1yIQOzaoKjBBjSj63/aLVjYE3qhRt5dvM+uUyfCg6UKCBbA==} + '@jridgewell/resolve-uri@3.1.2': resolution: {integrity: sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==} engines: {node: '>=6.0.0'} @@ -1059,6 +1071,11 @@ packages: resolution: {integrity: sha512-5cvg6CtKwfgdmVqY1WIiXKc3Q1bkRqGLi+2W/6ao+6Y7gu/RCwRuAhGEzh5B4KlszSuTLgZYuqFqo5bImjNKng==} engines: {node: '>= 0.6'} + acorn@8.15.0: + resolution: {integrity: sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==} + engines: {node: '>=0.4.0'} + hasBin: true + ajv-formats@3.0.1: resolution: {integrity: sha512-8iUql50EUR+uUcdRQ3HDqa6EVyo3docL8g5WJ3FNcWmu62IbkGUue/pEyLBW8VGKKucTPgqeks4fIU1DA4yowQ==} peerDependencies: @@ -1086,6 +1103,9 @@ packages: resolution: {integrity: sha512-4Dj6M28JB+oAH8kFkTLUo+a2jwOFkuqb3yucU0CANcRRUbxS0cP0nZYCGjcc3BNXwRIsUVmDGgzawme7zvJHvg==} engines: {node: '>=12'} + any-promise@1.3.0: + resolution: {integrity: sha512-7UvmKalWRt1wgjL1RrGxoSJW/0QZFIegpeGvZG9kjp8vrRu55XTHbwnqq2GpXm9uLbcuhxm3IqX9OB4MZR1b2A==} + apache-arrow@14.0.2: resolution: {integrity: sha512-EBO2xJN36/XoY81nhLcwCJgFwkboDZeyNQ+OPsG7bCoQjc2BT0aTyH/MR6SrL+LirSNz+cYqjGRlupMMlP1aEg==} hasBin: true @@ -1137,10 +1157,20 @@ packages: buffer@5.7.1: resolution: {integrity: sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==} + bundle-require@5.1.0: + resolution: {integrity: sha512-3WrrOuZiyaaZPWiEt4G3+IffISVC9HYlWueJEBWED4ZH4aIAC2PnkdnuRrR94M+w6yGWn4AglWtJtBI8YqvgoA==} + engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0} + peerDependencies: + esbuild: '>=0.18' + bytes@3.1.2: resolution: {integrity: sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==} engines: {node: '>= 0.8'} + cac@6.7.14: + resolution: {integrity: sha512-b6Ilus+c3RrdDk+JhLKUAQfzzgLEPy6wcXqS7f/xe1EETvsDP6GORG7SFuOs6cID5YkqchW/LXZbX5bc8j7ZcQ==} + engines: {node: '>=8'} + call-bind-apply-helpers@1.0.2: resolution: {integrity: sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==} engines: {node: '>= 0.4'} @@ -1165,6 +1195,10 @@ packages: resolution: {integrity: sha512-7NzBL0rN6fMUW+f7A6Io4h40qQlG+xGmtMxfbnH/K7TAtt8JQWVQK+6g0UXKMeVJoyV5EkkNsErQ8pVD3bLHbA==} engines: {node: ^12.17.0 || ^14.13 || >=16.0.0} + chokidar@4.0.3: + resolution: {integrity: sha512-Qgzu8kfBvo+cA4962jnP1KkS6Dop5NS6g7R5LFYJr4b8Ub94PPQXUksCw9PvXoeXPRRddRNC5C1JQUR2SMGtnA==} + engines: {node: '>= 14.16.0'} + chownr@1.1.4: resolution: {integrity: sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==} @@ -1206,6 +1240,17 @@ packages: resolution: {integrity: sha512-TywoWNNRbhoD0BXs1P3ZEScW8W5iKrnbithIl0YH+uCmBd0QpPOA8yc82DS3BIE5Ma6FnBVUsJ7wVUDz4dvOWQ==} engines: {node: '>=20'} + commander@4.1.1: + resolution: {integrity: sha512-NOKm8xhkzAjzFx8B2v5OAHT+u5pRQc2UCa2Vq9jYL/31o2wi9mxBA7LIFs3sV5VSC49z6pEhfbMULvShKj26WA==} + engines: {node: '>= 6'} + + confbox@0.1.8: + resolution: {integrity: sha512-RMtmw0iFkeR4YV+fUOSucriAQNb9g8zFR52MWCtl+cCZOFRNL6zeB395vPzFhEjjn4fMxXudmELnl/KF/WrK6w==} + + consola@3.4.2: + resolution: {integrity: sha512-5IKcdX0nnYavi6G7TtOhwkYzyjfJlatbjMjuLSfE2kYT5pMDOilZ4OvMhi637CcDICTmz3wARPoyhqyX1Y+XvA==} + engines: {node: ^14.18.0 || >=16.10.0} + content-disposition@1.0.1: resolution: {integrity: sha512-oIXISMynqSqm241k6kcQ5UwttDILMK4BiurCfGEREw6+X9jkkpEe5T9FZaApyLGGOnFuyMWZpdolTXMtvEJ08Q==} engines: {node: '>=18'} @@ -1493,6 +1538,9 @@ packages: resolution: {integrity: sha512-6Tb2myMioCAgv5kfvP5/PkZZ/ntTpVK39fHY7WkWBgvbeE+VHd/tZuZ4mrC+bxh4cfOZeYKVPaJIZtZXV7GNCQ==} engines: {node: '>=4.0.0'} + fix-dts-default-cjs-exports@1.0.1: + resolution: {integrity: sha512-pVIECanWFC61Hzl2+oOCtoJ3F17kglZC/6N94eRWycFgBH35hHx0Li604ZIzhseh97mf2p0cv7vVrOZGoqhlEg==} + flatbuffers@23.5.26: resolution: {integrity: sha512-vE+SI9vrJDwi1oETtTIFldC/o9GsVKRM+s6EL0nQgxXlYV1Vc4Tk30hj4xGICftInKQKj1F3up2n8UbIVobISQ==} @@ -1631,6 +1679,10 @@ packages: jose@6.1.3: resolution: {integrity: sha512-0TpaTfihd4QMNwrz/ob2Bp7X04yuxJkjRGi4aKmOqwhov54i6u79oCv7T+C7lo70MKH6BesI3vscD1yb/yzKXQ==} + joycon@3.1.1: + resolution: {integrity: sha512-34wB/Y7MW7bzjKRjUKTa46I2Z7eV62Rkhva+KkopW7Qvv/OSWBqvkSY7vusOPrNuZcUG3tApvdVgNB8POj3SPw==} + engines: {node: '>=10'} + js-tokens@9.0.1: resolution: {integrity: sha512-mxa9E9ITFOt0ban3j6L5MpjwegGz6lBQmM1IJkWeBZGcMxto50+eWdjC/52xDbS2vy0k7vIMK0Fe2wfL9OQSpQ==} @@ -1641,6 +1693,13 @@ packages: json-schema-traverse@1.0.0: resolution: {integrity: sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==} + lilconfig@3.1.3: + resolution: {integrity: sha512-/vlFKAoH5Cgt3Ie+JLhRbwOsCQePABiU3tJ1egGvyQ+33R/vcwM2Zl2QR/LzjsBeItPt3oSVXapn+m4nQDvpzw==} + engines: {node: '>=14'} + + lines-and-columns@1.2.4: + resolution: {integrity: sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==} + lint-staged@16.2.7: resolution: {integrity: sha512-lDIj4RnYmK7/kXMya+qJsmkRFkGolciXjrsZ6PC25GdTfWOAWetR0ZbsNXRAj1EHHImRSalc+whZFg56F5DVow==} engines: {node: '>=20.17'} @@ -1650,6 +1709,10 @@ packages: resolution: {integrity: sha512-ME4Fb83LgEgwNw96RKNvKV4VTLuXfoKudAmm2lP8Kk87KaMK0/Xrx/aAkMWmT8mDb+3MlFDspfbCs7adjRxA2g==} engines: {node: '>=20.0.0'} + load-tsconfig@0.2.5: + resolution: {integrity: sha512-IXO6OCs9yg8tMKzfPZ1YmheJbZCiEsnBdcB03l0OcfK9prKnJb96siuHCr5Fl37/yo9DnKU+TLpxzTUspw9shg==} + engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0} + lodash.camelcase@4.3.0: resolution: {integrity: sha512-TwuEnCnxbc3rAvhf/LbG7tJUDzhqXyFnv3dtzLOPgCG/hODL7WFnsbwktkD7yUV0RrreP/l1PALq/YSg6VvjlA==} @@ -1720,9 +1783,15 @@ packages: mkdirp-classic@0.5.3: resolution: {integrity: sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==} + mlly@1.8.0: + resolution: {integrity: sha512-l8D9ODSRWLe2KHJSifWGwBqpTZXIXTeo8mlKjY+E2HAakaTeNpqAyBZ8GSqLzHgw4XmHmC8whvpjJNMbFZN7/g==} + ms@2.1.3: resolution: {integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==} + mz@2.7.0: + resolution: {integrity: sha512-z81GNO7nnYMEhrGh9LeymoE4+Yr0Wn5McHIZMK5cfQCl+NDX08sCZgUc9/6MHni9IWuFLm1Z3HTCXu2z9fN62Q==} + nano-spawn@2.0.0: resolution: {integrity: sha512-tacvGzUY5o2D8CBh2rrwxyNojUsZNU2zjNTzKQrkgGJQTbGAfArVWXSKMBokBeeg6C7OLRGUEyoFlYbfeWQIqw==} engines: {node: '>=20.17'} @@ -1803,10 +1872,35 @@ packages: engines: {node: '>=0.10'} hasBin: true + pirates@4.0.7: + resolution: {integrity: sha512-TfySrs/5nm8fQJDcBDuUng3VOUKsd7S+zqvbOTiGXHfxX4wK31ard+hoNuvkicM/2YFzlpDgABOevKSsB4G/FA==} + engines: {node: '>= 6'} + pkce-challenge@5.0.1: resolution: {integrity: sha512-wQ0b/W4Fr01qtpHlqSqspcj3EhBvimsdh0KlHhH8HRZnMsEa0ea2fTULOXOS9ccQr3om+GcGRk4e+isrZWV8qQ==} engines: {node: '>=16.20.0'} + pkg-types@1.3.1: + resolution: {integrity: sha512-/Jm5M4RvtBFVkKWRu2BLUTNP8/M2a+UwuAX+ae4770q1qVGtfjG+WTCupoZixokjmHiry8uI+dlY8KXYV5HVVQ==} + + postcss-load-config@6.0.1: + resolution: {integrity: sha512-oPtTM4oerL+UXmx+93ytZVN82RrlY/wPUV8IeDxFrzIjXOLF1pN+EmKPLbubvKHT2HC20xXsCAH2Z+CKV6Oz/g==} + engines: {node: '>= 18'} + peerDependencies: + jiti: '>=1.21.0' + postcss: '>=8.0.9' + tsx: ^4.8.1 + yaml: ^2.4.2 + peerDependenciesMeta: + jiti: + optional: true + postcss: + optional: true + tsx: + optional: true + yaml: + optional: true + postcss@8.5.6: resolution: {integrity: sha512-3Ybi1tAuwAP9s0r1UQ2J4n5Y0G05bJkpUIO0/bI9MhwmD70S5aTWbXGBwxHrelT+XM1k6dM0pk+SwNkpTRN7Pg==} engines: {node: ^10 || ^12 || >=14} @@ -1846,6 +1940,10 @@ packages: resolution: {integrity: sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==} engines: {node: '>= 6'} + readdirp@4.1.2: + resolution: {integrity: sha512-GDhwkLfywWL2s6vEjyhri+eXmfH6j1L7JE27WhqLeYzoh/A3DBaYGEj2H/HFZCn/kMfim73FXxEJTw06WtxQwg==} + engines: {node: '>= 14.18.0'} + repeat-string@1.6.1: resolution: {integrity: sha512-PV0dzCYDNfRi1jCDbJzpW7jNNDRuCOG/jI5ctQcGKt/clZD+YcPS3yIlWuTJMmESC8aevCFmWJy5wjAFgNqN6w==} engines: {node: '>=0.10'} @@ -1854,6 +1952,10 @@ packages: resolution: {integrity: sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==} engines: {node: '>=0.10.0'} + resolve-from@5.0.0: + resolution: {integrity: sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw==} + engines: {node: '>=8'} + resolve-pkg-maps@1.0.0: resolution: {integrity: sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==} @@ -1947,6 +2049,10 @@ packages: resolution: {integrity: sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==} engines: {node: '>=0.10.0'} + source-map@0.7.6: + resolution: {integrity: sha512-i5uvt8C3ikiWeNZSVZNWcfZPItFQOsYTUAOkcUPGd8DqDy1uOUikjt5dG+uRlwyvR108Fb9DOd4GvXfT0N2/uQ==} + engines: {node: '>= 12'} + stackback@0.0.2: resolution: {integrity: sha512-1XMJE5fQo1jGH6Y/7ebnwPOBEkIEnT4QF32d5R1+VXdXveM0IBMJt8zfaxX1P3QhVwrYe+576+jkANtSS2mBbw==} @@ -1988,6 +2094,11 @@ packages: resolution: {integrity: sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ==} engines: {node: '>=0.10.0'} + sucrase@3.35.1: + resolution: {integrity: sha512-DhuTmvZWux4H1UOnWMB3sk0sbaCVOoQZjv8u1rDoTV0HTdGem9hkAZtl4JZy8P2z4Bg0nT+YMeOFyVr4zcG5Tw==} + engines: {node: '>=16 || 14 >=14.17'} + hasBin: true + supports-color@7.2.0: resolution: {integrity: sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==} engines: {node: '>=8'} @@ -2004,9 +2115,19 @@ packages: resolution: {integrity: sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==} engines: {node: '>=6'} + thenify-all@1.6.0: + resolution: {integrity: sha512-RNxQH/qI8/t3thXJDwcstUO4zeqo64+Uy/+sNVRBx4Xn2OX+OZ9oP+iJnNFqplFra2ZUVeKCSa2oVWi3T4uVmA==} + engines: {node: '>=0.8'} + + thenify@3.3.1: + resolution: {integrity: sha512-RVZSIV5IG10Hk3enotrhvz0T9em6cyHBLkH/YAZuKqd8hRkKhSfCGIcP2KUY0EPxndzANBmNllzWPwak+bheSw==} + tinybench@2.9.0: resolution: {integrity: sha512-0+DUvqWMValLmha6lr4kD8iAMK1HzV0/aKnCtWb9v9641TnP/MFb7Pc2bxoxQjTXAErryXVgUOfv2YqNllqGeg==} + tinyexec@0.3.2: + resolution: {integrity: sha512-KQQR9yN7R5+OSwaK0XQoj22pwHoTlgYqmUscPYoknOoWCWfj/5/ABTMRi69FrKU5ffPVh5QcFikpWJI/P1ocHA==} + tinyexec@1.0.2: resolution: {integrity: sha512-W/KYk+NFhkmsYpuHq5JykngiOCnxeVL8v8dFnqxSD8qEEdRfXk1SDM6JzNqcERbcGYj9tMrDQBYV9cjgnunFIg==} engines: {node: '>=18'} @@ -2027,9 +2148,35 @@ packages: resolution: {integrity: sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==} engines: {node: '>=0.6'} + tree-kill@1.2.2: + resolution: {integrity: sha512-L0Orpi8qGpRG//Nd+H90vFB+3iHnue1zSSGmNOOCh1GLJ7rUKVwV2HvijphGQS2UmhUZewS9VgvxYIdgr+fG1A==} + hasBin: true + + ts-interface-checker@0.1.13: + resolution: {integrity: sha512-Y/arvbn+rrz3JCKl9C4kVNfTfSm2/mEp5FSz5EsZSANGPSlQrpRI5M4PKF+mJnE52jOO90PnPSc3Ur3bTQw0gA==} + tslib@2.8.1: resolution: {integrity: sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==} + tsup@8.5.1: + resolution: {integrity: sha512-xtgkqwdhpKWr3tKPmCkvYmS9xnQK3m3XgxZHwSUjvfTjp7YfXe5tT3GgWi0F2N+ZSMsOeWeZFh7ZZFg5iPhing==} + engines: {node: '>=18'} + hasBin: true + peerDependencies: + '@microsoft/api-extractor': ^7.36.0 + '@swc/core': ^1 + postcss: ^8.4.12 + typescript: '>=4.5.0' + peerDependenciesMeta: + '@microsoft/api-extractor': + optional: true + '@swc/core': + optional: true + postcss: + optional: true + typescript: + optional: true + tsx@4.21.0: resolution: {integrity: sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==} engines: {node: '>=18.0.0'} @@ -2055,6 +2202,9 @@ packages: resolution: {integrity: sha512-ya4mg/30vm+DOWfBg4YK3j2WD6TWtRkCbasOJr40CseYENzCUby/7rIvXA99JGsQHeNxLbnXdyLLxKSv3tauFw==} engines: {node: '>=12.17'} + ufo@1.6.1: + resolution: {integrity: sha512-9a4/uxlTWJ4+a5i0ooc1rU7C7YOw3wT+UGqdeNNHWnOF9qcMBgLRS+4IYUqbczewFx4mLEig6gawh7X6mFlEkA==} + undici-types@6.21.0: resolution: {integrity: sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==} @@ -2074,7 +2224,6 @@ packages: vectordb@0.21.2: resolution: {integrity: sha512-5tiwUq0jDtfIpcr7NY+kNCTecHCzSq0AqQtMzJphH7z6H6gfrw9t5/Aoy5/QnS0uAWIgqvCbE5qneQOFGxE+Og==} - cpu: [x64, arm64] os: [darwin, linux, win32] deprecated: Use @lancedb/lancedb instead. peerDependencies: @@ -2193,9 +2342,6 @@ packages: zod@3.25.76: resolution: {integrity: sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==} - zod@4.1.13: - resolution: {integrity: sha512-AvvthqfqrAhNH9dnfmrfKzX5upOdjUVJYFqNSlkmGf64gRaTzlPwz99IHYnVs28qYAybvAlBV+H7pn0saFY4Ig==} - snapshots: '@75lb/deep-merge@1.1.2': @@ -2571,6 +2717,11 @@ snapshots: '@google/generative-ai@0.24.1': {} + '@jridgewell/gen-mapping@0.3.13': + dependencies: + '@jridgewell/sourcemap-codec': 1.5.5 + '@jridgewell/trace-mapping': 0.3.31 + '@jridgewell/resolve-uri@3.1.2': {} '@jridgewell/sourcemap-codec@1.5.5': {} @@ -2597,7 +2748,7 @@ snapshots: '@lytics/kero@1.0.0': {} - '@modelcontextprotocol/sdk@1.24.3(zod@4.1.13)': + '@modelcontextprotocol/sdk@1.24.3(zod@3.25.76)': dependencies: ajv: 8.17.1 ajv-formats: 3.0.1(ajv@8.17.1) @@ -2611,8 +2762,8 @@ snapshots: jose: 6.1.3 pkce-challenge: 5.0.1 raw-body: 3.0.2 - zod: 4.1.13 - zod-to-json-schema: 3.25.0(zod@4.1.13) + zod: 3.25.76 + zod-to-json-schema: 3.25.0(zod@3.25.76) transitivePeerDependencies: - supports-color @@ -2778,6 +2929,8 @@ snapshots: mime-types: 3.0.2 negotiator: 1.0.0 + acorn@8.15.0: {} + ajv-formats@3.0.1(ajv@8.17.1): optionalDependencies: ajv: 8.17.1 @@ -2801,6 +2954,8 @@ snapshots: ansi-styles@6.2.3: {} + any-promise@1.3.0: {} + apache-arrow@14.0.2: dependencies: '@types/command-line-args': 5.2.0 @@ -2878,8 +3033,15 @@ snapshots: base64-js: 1.5.1 ieee754: 1.2.1 + bundle-require@5.1.0(esbuild@0.27.1): + dependencies: + esbuild: 0.27.1 + load-tsconfig: 0.2.5 + bytes@3.1.2: {} + cac@6.7.14: {} + call-bind-apply-helpers@1.0.2: dependencies: es-errors: 1.3.0 @@ -2903,6 +3065,10 @@ snapshots: chalk@5.6.2: {} + chokidar@4.0.3: + dependencies: + readdirp: 4.1.2 + chownr@1.1.4: {} cli-cursor@5.0.0: @@ -2944,6 +3110,12 @@ snapshots: commander@14.0.2: {} + commander@4.1.1: {} + + confbox@0.1.8: {} + + consola@3.4.2: {} + content-disposition@1.0.1: {} content-type@1.0.5: {} @@ -3232,6 +3404,12 @@ snapshots: dependencies: array-back: 3.1.0 + fix-dts-default-cjs-exports@1.0.1: + dependencies: + magic-string: 0.30.21 + mlly: 1.8.0 + rollup: 4.53.3 + flatbuffers@23.5.26: {} follow-redirects@1.15.11: {} @@ -3354,12 +3532,18 @@ snapshots: jose@6.1.3: {} + joycon@3.1.1: {} + js-tokens@9.0.1: {} json-bignum@0.0.3: {} json-schema-traverse@1.0.0: {} + lilconfig@3.1.3: {} + + lines-and-columns@1.2.4: {} + lint-staged@16.2.7: dependencies: commander: 14.0.2 @@ -3379,6 +3563,8 @@ snapshots: rfdc: 1.4.1 wrap-ansi: 9.0.2 + load-tsconfig@0.2.5: {} + lodash.camelcase@4.3.0: {} lodash@4.17.21: {} @@ -3441,8 +3627,21 @@ snapshots: mkdirp-classic@0.5.3: {} + mlly@1.8.0: + dependencies: + acorn: 8.15.0 + pathe: 2.0.3 + pkg-types: 1.3.1 + ufo: 1.6.1 + ms@2.1.3: {} + mz@2.7.0: + dependencies: + any-promise: 1.3.0 + object-assign: 4.1.1 + thenify-all: 1.6.0 + nano-spawn@2.0.0: {} nanoid@3.3.11: {} @@ -3505,8 +3704,24 @@ snapshots: pidtree@0.6.0: {} + pirates@4.0.7: {} + pkce-challenge@5.0.1: {} + pkg-types@1.3.1: + dependencies: + confbox: 0.1.8 + mlly: 1.8.0 + pathe: 2.0.3 + + postcss-load-config@6.0.1(postcss@8.5.6)(tsx@4.21.0)(yaml@2.8.2): + dependencies: + lilconfig: 3.1.3 + optionalDependencies: + postcss: 8.5.6 + tsx: 4.21.0 + yaml: 2.8.2 + postcss@8.5.6: dependencies: nanoid: 3.3.11 @@ -3566,10 +3781,14 @@ snapshots: string_decoder: 1.3.0 util-deprecate: 1.0.2 + readdirp@4.1.2: {} + repeat-string@1.6.1: {} require-from-string@2.0.2: {} + resolve-from@5.0.0: {} + resolve-pkg-maps@1.0.0: {} restore-cursor@5.1.0: @@ -3710,6 +3929,8 @@ snapshots: source-map@0.6.1: {} + source-map@0.7.6: {} + stackback@0.0.2: {} statuses@2.0.2: {} @@ -3743,6 +3964,16 @@ snapshots: strip-json-comments@2.0.1: {} + sucrase@3.35.1: + dependencies: + '@jridgewell/gen-mapping': 0.3.13 + commander: 4.1.1 + lines-and-columns: 1.2.4 + mz: 2.7.0 + pirates: 4.0.7 + tinyglobby: 0.2.15 + ts-interface-checker: 0.1.13 + supports-color@7.2.0: dependencies: has-flag: 4.0.0 @@ -3772,8 +4003,18 @@ snapshots: inherits: 2.0.4 readable-stream: 3.6.2 + thenify-all@1.6.0: + dependencies: + thenify: 3.3.1 + + thenify@3.3.1: + dependencies: + any-promise: 1.3.0 + tinybench@2.9.0: {} + tinyexec@0.3.2: {} + tinyexec@1.0.2: {} tinyglobby@0.2.15: @@ -3789,8 +4030,40 @@ snapshots: toidentifier@1.0.1: {} + tree-kill@1.2.2: {} + + ts-interface-checker@0.1.13: {} + tslib@2.8.1: {} + tsup@8.5.1(postcss@8.5.6)(tsx@4.21.0)(typescript@5.9.3)(yaml@2.8.2): + dependencies: + bundle-require: 5.1.0(esbuild@0.27.1) + cac: 6.7.14 + chokidar: 4.0.3 + consola: 3.4.2 + debug: 4.4.3 + esbuild: 0.27.1 + fix-dts-default-cjs-exports: 1.0.1 + joycon: 3.1.1 + picocolors: 1.1.1 + postcss-load-config: 6.0.1(postcss@8.5.6)(tsx@4.21.0)(yaml@2.8.2) + resolve-from: 5.0.0 + rollup: 4.53.3 + source-map: 0.7.6 + sucrase: 3.35.1 + tinyexec: 0.3.2 + tinyglobby: 0.2.15 + tree-kill: 1.2.2 + optionalDependencies: + postcss: 8.5.6 + typescript: 5.9.3 + transitivePeerDependencies: + - jiti + - supports-color + - tsx + - yaml + tsx@4.21.0: dependencies: esbuild: 0.27.1 @@ -3814,6 +4087,8 @@ snapshots: typical@7.3.0: {} + ufo@1.6.1: {} + undici-types@6.21.0: {} undici-types@7.16.0: {} @@ -3913,10 +4188,8 @@ snapshots: yoctocolors@2.1.2: {} - zod-to-json-schema@3.25.0(zod@4.1.13): + zod-to-json-schema@3.25.0(zod@3.25.76): dependencies: - zod: 4.1.13 + zod: 3.25.76 zod@3.25.76: {} - - zod@4.1.13: {} diff --git a/tsconfig.json b/tsconfig.json index aedd253..8c14324 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -4,8 +4,8 @@ "strict": true, "skipLibCheck": true, "forceConsistentCasingInFileNames": true, - "moduleResolution": "Node16", - "module": "NodeNext", + "moduleResolution": "bundler", + "module": "ESNext", "declaration": true, "composite": true, "declarationMap": true,