prosdevlab · prosdev · Dec 7, 2025 · Dec 7, 2025 · Dec 7, 2025 · Dec 7, 2025
diff --git a/AGENTS.md b/AGENTS.md
@@ -26,10 +26,11 @@ The project is organized as a monorepo using pnpm workspaces:
 
 ```
 packages/
-├── cli/           # CLI entry point and MCP server implementation
-├── core/          # Shared types, interfaces, and core logic
-├── extract/       # Document extraction logic (AI integration)
-└── vector-store/  # Vector database interaction and semantic search
+├── cli/           # CLI entry point and MCP server
+├── core/          # Shared types and interfaces
+├── extract/       # Document extraction (Gemini, Ollama)
+├── storage/       # SQLite persistence (Drizzle ORM)
+└── vector-store/  # Vector database for semantic search
 ```
 
 ## Setup Commands

diff --git a/README.md b/README.md
@@ -25,7 +25,13 @@ npm install -g doc-agent
 doc extract invoice.pdf
 ```
 
-**With specific provider:**
+**With Ollama (local, privacy-first):**
+```bash
+# Ensure Ollama is running
+doc extract invoice.pdf --provider ollama
+```
+
+**With Gemini (cloud):**
 ```bash
 export GEMINI_API_KEY=your_key_here
 doc extract invoice.pdf --provider gemini

diff --git a/package.json b/package.json
@@ -48,6 +48,7 @@
     "@types/node": "^24.10.1",
     "@vitest/coverage-v8": "^4.0.15",
     "lint-staged": "16.2.7",
+    "tsup": "^8.5.1",
     "typescript": "^5.9.3",
     "vitest": "^4.0.15"
   }

diff --git a/packages/cli/README.md b/packages/cli/README.md
@@ -0,0 +1,46 @@
+# @doc-agent/cli
+
+Command-line interface and MCP server for document extraction.
+
+## Use cases
+
+- Extract data from documents via terminal
+- Integrate with Claude Desktop or Cursor via MCP
+- Batch process directories of documents (planned)
+
+## Commands
+
+| Command | Description |
+|---------|-------------|
+| `doc extract <file>` | Extract structured data from PDF/image |
+| `doc mcp` | Start MCP server for AI assistant integration |
+| `doc search <query>` | Search indexed documents (planned) |
+| `doc index <dir>` | Batch index directory (planned) |
+
+## Options
+
+```
+extract:
+  -p, --provider <provider>  AI provider: gemini, openai, ollama (default: ollama)
+  -m, --model <model>        Model name (default: llama3.2-vision)
+```
+
+## Environment variables
+
+| Variable | Required for |
+|----------|--------------|
+| `GEMINI_API_KEY` | `--provider gemini` |
+| `OPENAI_API_KEY` | `--provider openai` |
+
+## MCP tools
+
+When running `doc mcp`, exposes:
+- `extract_document` — Extract data from a file path
+- `search_documents` — Search indexed documents (planned)
+
+## Depends on
+
+- `@doc-agent/core` — Types
+- `@doc-agent/extract` — Extraction logic
+- `@doc-agent/vector-store` — Search (planned)
+
diff --git a/packages/cli/package.json b/packages/cli/package.json
@@ -10,7 +10,7 @@
     "doc-agent": "./dist/cli.js"
   },
   "scripts": {
-    "build": "tsc",
+    "build": "tsup",
     "dev": "tsx src/cli.ts",
     "mcp": "tsx src/mcp/server.ts"
   },
@@ -23,7 +23,8 @@
     "chalk": "^5.6.2",
     "commander": "^14.0.2",
     "ora": "^9.0.0",
-    "vectordb": "^0.21.2"
+    "vectordb": "^0.21.2",
+    "zod": "^3.23.8"
   },
   "devDependencies": {
     "@types/node": "^24.10.1",

diff --git a/packages/cli/src/mcp/index.ts b/packages/cli/src/mcp/index.ts
@@ -1 +1,5 @@
-export { startMCPServer } from './server.js';
+// Server
+
+// SDK re-exports (for consumers who need MCP types)
+export { McpServer, StdioServerTransport } from './sdk';
+export { startMCPServer } from './server';
diff --git a/packages/cli/src/mcp/sdk.ts b/packages/cli/src/mcp/sdk.ts
@@ -0,0 +1,8 @@
+/**
+ * MCP SDK re-exports
+ * Barrel file to provide clean imports without .js extensions
+ */
+
+// Server
+export { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
+export { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
diff --git a/packages/cli/src/mcp/server.ts b/packages/cli/src/mcp/server.ts
@@ -1,123 +1,63 @@
 import type { Config } from '@doc-agent/core';
 import { extractDocument } from '@doc-agent/extract';
-import { Server } from '@modelcontextprotocol/sdk/server';
-import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
-import {
-  type CallToolRequest,
-  CallToolRequestSchema,
-  ListToolsRequestSchema,
-} from '@modelcontextprotocol/sdk/types.js';
+import { z } from 'zod';
+import { McpServer, StdioServerTransport } from './sdk';
 
-const server = new Server(
-  {
-    name: 'doc-agent',
-    version: '0.1.0',
-  },
-  {
-    capabilities: {
-      tools: {},
-    },
-  }
-);
-
-// List available tools
-server.setRequestHandler(ListToolsRequestSchema, async () => {
-  return {
-    tools: [
-      {
-        name: 'extract_document',
-        description: 'Extract structured data from invoice, receipt, or bank statement',
-        inputSchema: {
-          type: 'object',
-          properties: {
-            filepath: {
-              type: 'string',
-              description: 'Path to the document file',
-            },
-            provider: {
-              type: 'string',
-              enum: ['gemini', 'openai', 'ollama'],
-              description: 'AI provider to use',
-              default: 'gemini',
-            },
-          },
-          required: ['filepath'],
-        },
-      },
-      {
-        name: 'search_documents',
-        description: 'Search indexed documents using natural language',
-        inputSchema: {
-          type: 'object',
-          properties: {
-            query: {
-              type: 'string',
-              description: 'Search query in natural language',
-            },
-            limit: {
-              type: 'number',
-              description: 'Maximum number of results',
-              default: 10,
-            },
-          },
-          required: ['query'],
-        },
-      },
-    ],
-  };
+const server = new McpServer({
+  name: 'doc-agent',
+  version: '0.1.0',
 });
 
-// Handle tool calls
-server.setRequestHandler(CallToolRequestSchema, async (request: CallToolRequest) => {
-  if (request.params.name === 'extract_document') {
-    const { filepath, provider = 'gemini' } = request.params.arguments as {
-      filepath: string;
-      provider?: string;
-    };
-
+// Register extract_document tool
+server.registerTool(
+  'extract_document',
+  {
+    description: 'Extract structured data from invoice, receipt, or bank statement',
+    inputSchema: {
+      filepath: z.string().describe('Path to the document file'),
+      provider: z
+        .enum(['gemini', 'openai', 'ollama'])
+        .default('gemini')
+        .describe('AI provider to use'),
+    },
+  },
+  async ({ filepath, provider }) => {
     const config: Config = {
-      aiProvider: provider as 'gemini' | 'openai' | 'ollama',
+      aiProvider: provider,
       geminiApiKey: process.env.GEMINI_API_KEY,
       openaiApiKey: process.env.OPENAI_API_KEY,
     };
 
     try {
       const result = await extractDocument(filepath, config);
-
       return {
-        content: [
-          {
-            type: 'text',
-            text: JSON.stringify(result, null, 2),
-          },
-        ],
+        content: [{ type: 'text', text: JSON.stringify(result, null, 2) }],
       };
     } catch (error) {
       return {
-        content: [
-          {
-            type: 'text',
-            text: `Error: ${(error as Error).message}`,
-          },
-        ],
+        content: [{ type: 'text', text: `Error: ${(error as Error).message}` }],
         isError: true,
       };
     }
   }
+);
 
-  if (request.params.name === 'search_documents') {
+// Register search_documents tool
+server.registerTool(
+  'search_documents',
+  {
+    description: 'Search indexed documents using natural language',
+    inputSchema: {
+      query: z.string().describe('Search query in natural language'),
+      limit: z.number().default(10).describe('Maximum number of results'),
+    },
+  },
+  async () => {
     return {
-      content: [
-        {
-          type: 'text',
-          text: 'Search functionality not yet implemented',
-        },
-      ],
+      content: [{ type: 'text', text: 'Search functionality not yet implemented' }],
     };
   }
-
-  throw new Error(`Unknown tool: ${request.params.name}`);
-});
+);
 
 export async function startMCPServer() {
   const transport = new StdioServerTransport();

diff --git a/packages/cli/tsconfig.json b/packages/cli/tsconfig.json
@@ -5,6 +5,9 @@
     "rootDir": "./src"
   },
   "include": ["src/**/*"],
-  "exclude": ["node_modules", "dist"],
-  "references": [{ "path": "../core" }, { "path": "../extract" }, { "path": "../vector-store" }]
+  "exclude": ["node_modules", "dist", "**/*.test.ts"],
+  "references": [{ "path": "../core" }, { "path": "../extract" }, { "path": "../vector-store" }],
+  "ts-node": {
+    "esm": true
+  }
 }
diff --git a/packages/cli/tsup.config.ts b/packages/cli/tsup.config.ts
@@ -0,0 +1,27 @@
+import { defineConfig } from 'tsup';
+
+export default defineConfig({
+  entry: {
+    cli: 'src/cli.ts',
+    'mcp/server': 'src/mcp/server.ts',
+    'mcp/index': 'src/mcp/index.ts',
+  },
+  format: ['esm'],
+  dts: false,
+  sourcemap: true,
+  clean: true,
+  splitting: false,
+  treeshake: true,
+  external: [
+    '@doc-agent/core',
+    '@doc-agent/extract',
+    '@doc-agent/vector-store',
+    '@google/generative-ai',
+    '@modelcontextprotocol/sdk',
+    'chalk',
+    'commander',
+    'ora',
+    'vectordb',
+  ],
+  tsconfig: './tsconfig.json',
+});
diff --git a/packages/core/README.md b/packages/core/README.md
@@ -0,0 +1,15 @@
+# @doc-agent/core
+
+Shared TypeScript types for document extraction and search.
+
+## Types
+
+- `DocumentData` — Extracted document (invoice, receipt, bank statement)
+- `LineItem` — Individual line item with description, quantity, price
+- `SearchResult` — Vector search result with similarity score
+- `Config` — Provider configuration (gemini, openai, ollama)
+
+## Used by
+
+All other packages depend on these types.
+
diff --git a/packages/core/package.json b/packages/core/package.json
@@ -14,8 +14,8 @@
     }
   },
   "scripts": {
-    "build": "tsc",
-    "dev": "tsc --watch",
+    "build": "tsup",
+    "dev": "tsup --watch",
     "clean": "rm -rf dist",
     "typecheck": "tsc --noEmit",
     "lint": "biome lint ./src",

diff --git a/packages/core/tsup.config.ts b/packages/core/tsup.config.ts
@@ -0,0 +1,12 @@
+import { defineConfig } from 'tsup';
+
+export default defineConfig({
+  entry: ['src/index.ts'],
+  format: ['esm'],
+  dts: true,
+  sourcemap: true,
+  clean: true,
+  splitting: false,
+  treeshake: true,
+  tsconfig: './tsconfig.json',
+});
diff --git a/packages/extract/README.md b/packages/extract/README.md
@@ -0,0 +1,35 @@
+# @doc-agent/extract
+
+Extract structured data from invoices, receipts, and bank statements using Vision AI.
+
+## Use cases
+
+- Parse PDF or image documents into structured JSON
+- Extract vendor, amount, date, line items from financial documents
+- Local privacy-first extraction without cloud APIs (Ollama)
+
+## Exports
+
+- `extractDocument(path, config)` — Main extraction function
+- `getMimeType(path)` — Detect MIME type from file extension
+
+## Providers
+
+| Provider | Requires | Best for |
+|----------|----------|----------|
+| `gemini` | `GEMINI_API_KEY` env var | Accuracy, speed |
+| `ollama` | Local Ollama + `llama3.2-vision` | Privacy, offline |
+
+## Depends on
+
+- `@doc-agent/core` — DocumentData, Config types
+
+## Used by
+
+- `@doc-agent/cli` — extract command, MCP server
+
+## Gotchas
+
+- Ollama retries once on Zod validation failure (LLM output can be inconsistent)
+- PDF support requires vision-capable model
+