From 33cf7d17674afc77b12e1e3215ff30d6403763dd Mon Sep 17 00:00:00 2001
From: prosdev <pros.seng@contentstack.com>
Date: Mon, 8 Dec 2025 10:40:24 -0800
Subject: [PATCH 1/5] refactor(extract): split monolith into modules

- types.ts: StreamChunk, StreamCallback, ExtractOptions types
- schemas.ts: Zod schemas with flexible field mapping
- mime.ts: MIME type detection helper
- pdf.ts: PDF to image conversion (scale 3 for quality)
- ocr.ts: Tesseract.js OCR with progress callbacks
- providers/ollama.ts: Ollama extraction with kero logging
- providers/gemini.ts: Gemini extraction (unchanged)
- extract.ts: Main extraction orchestrator
- index.ts: Barrel export

Improves testability and maintainability.
---
 packages/extract/package.json                 |   1 +
 .../extract/src/__tests__/extract.test.ts     | 138 ++++++-
 packages/extract/src/__tests__/mime.test.ts   |  40 ++
 packages/extract/src/__tests__/ocr.test.ts    | 234 ++++++------
 packages/extract/src/__tests__/pdf.test.ts    |  89 +++++
 .../extract/src/__tests__/schemas.test.ts     | 132 +++++++
 packages/extract/src/extract.ts               |  35 ++
 packages/extract/src/index.ts                 | 356 ++----------------
 packages/extract/src/mime.ts                  |  23 ++
 packages/extract/src/ocr.ts                   |  72 ++++
 packages/extract/src/pdf.ts                   |  45 +++
 packages/extract/src/providers/gemini.ts      |  51 +++
 packages/extract/src/providers/index.ts       |   3 +
 packages/extract/src/providers/ollama.ts      | 259 +++++++++++++
 packages/extract/src/schemas.ts               | 101 +++++
 packages/extract/src/types.ts                 |  27 ++
 pnpm-lock.yaml                                |   4 +-
 vitest.config.ts                              |  11 +-
 18 files changed, 1152 insertions(+), 469 deletions(-)
 create mode 100644 packages/extract/src/__tests__/mime.test.ts
 create mode 100644 packages/extract/src/__tests__/pdf.test.ts
 create mode 100644 packages/extract/src/__tests__/schemas.test.ts
 create mode 100644 packages/extract/src/extract.ts
 create mode 100644 packages/extract/src/mime.ts
 create mode 100644 packages/extract/src/ocr.ts
 create mode 100644 packages/extract/src/pdf.ts
 create mode 100644 packages/extract/src/providers/gemini.ts
 create mode 100644 packages/extract/src/providers/index.ts
 create mode 100644 packages/extract/src/providers/ollama.ts
 create mode 100644 packages/extract/src/schemas.ts
 create mode 100644 packages/extract/src/types.ts

diff --git a/packages/extract/package.json b/packages/extract/package.json
index bb028a9..1181fe9 100644
--- a/packages/extract/package.json
+++ b/packages/extract/package.json
@@ -25,6 +25,7 @@
   "dependencies": {
     "@doc-agent/core": "workspace:*",
     "@google/generative-ai": "^0.24.1",
+    "@lytics/kero": "^1.0.0",
     "pdf-to-img": "^5.0.0",
     "tesseract.js": "^6.0.1",
     "zod": "^3.23.8"
diff --git a/packages/extract/src/__tests__/extract.test.ts b/packages/extract/src/__tests__/extract.test.ts
index e4c6619..9b6d956 100644
--- a/packages/extract/src/__tests__/extract.test.ts
+++ b/packages/extract/src/__tests__/extract.test.ts
@@ -1,22 +1,136 @@
+import { beforeEach, describe, expect, it, vi } from 'vitest';
+
+// Mock providers
+vi.mock('../providers/gemini', () => ({
+  extractWithGemini: vi.fn(),
+}));
+
+vi.mock('../providers/ollama', () => ({
+  extractWithOllama: vi.fn(),
+}));
+
+// Mock fs
+vi.mock('node:fs', () => ({
+  readFileSync: vi.fn(() => Buffer.from('fake-file-content')),
+}));
+
+// Mock kero
+vi.mock('@lytics/kero', () => ({
+  default: {
+    createLogger: () => ({
+      debug: vi.fn(),
+      error: vi.fn(),
+    }),
+  },
+}));
+
 import type { Config } from '@doc-agent/core';
-import { describe, expect, it } from 'vitest';
-import { extractDocument } from '../index';
+import { extractWithGemini } from '../providers/gemini';
+import { extractWithOllama } from '../providers/ollama';
+import { extractDocument } from '../extract';
 
 describe('extractDocument', () => {
-  it('should throw error for unsupported provider', async () => {
-    const fs = await import('node:fs');
-    const path = await import('node:path');
-    const os = await import('node:os');
-    const tmpDir = os.tmpdir();
-    const testFile = path.join(tmpDir, 'test-unsupported.pdf');
-    fs.writeFileSync(testFile, Buffer.from('test pdf content'));
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  it('should route to Gemini provider when aiProvider is gemini', async () => {
+    const mockResult = {
+      id: '123',
+      filename: 'test.pdf',
+      extractedAt: new Date(),
+      type: 'receipt' as const,
+    };
+    vi.mocked(extractWithGemini).mockResolvedValueOnce(mockResult);
 
     const config: Config = {
-      aiProvider: 'openai' as 'gemini' | 'openai' | 'ollama',
+      aiProvider: 'gemini',
+      geminiApiKey: 'test-key',
+    };
+
+    const result = await extractDocument('/path/to/test.pdf', config);
+
+    expect(extractWithGemini).toHaveBeenCalledWith(
+      '/path/to/test.pdf',
+      expect.any(String), // base64
+      config
+    );
+    expect(result).toEqual(mockResult);
+  });
+
+  it('should route to Ollama provider when aiProvider is ollama', async () => {
+    const mockResult = {
+      id: '456',
+      filename: 'invoice.pdf',
+      extractedAt: new Date(),
+      type: 'invoice' as const,
     };
+    vi.mocked(extractWithOllama).mockResolvedValueOnce(mockResult);
+
+    const config: Config = {
+      aiProvider: 'ollama',
+    };
+
+    const result = await extractDocument('/path/to/invoice.pdf', config);
+
+    expect(extractWithOllama).toHaveBeenCalledWith(
+      '/path/to/invoice.pdf',
+      expect.any(String), // base64
+      config,
+      0, // retryCount
+      undefined // onStream
+    );
+    expect(result).toEqual(mockResult);
+  });
+
+  it('should pass onStream callback to Ollama provider', async () => {
+    const mockResult = {
+      id: '789',
+      filename: 'doc.pdf',
+      extractedAt: new Date(),
+      type: 'other' as const,
+    };
+    vi.mocked(extractWithOllama).mockResolvedValueOnce(mockResult);
+
+    const config: Config = { aiProvider: 'ollama' };
+    const onStream = vi.fn();
+
+    await extractDocument('/path/to/doc.pdf', config, { onStream });
+
+    expect(extractWithOllama).toHaveBeenCalledWith(
+      '/path/to/doc.pdf',
+      expect.any(String),
+      config,
+      0,
+      onStream
+    );
+  });
+
+  it('should throw error for unsupported provider', async () => {
+    const config = { aiProvider: 'openai' } as Config;
+
+    await expect(extractDocument('/path/to/file.pdf', config)).rejects.toThrow(
+      'Provider openai not yet implemented'
+    );
+  });
+
+  it('should convert file to base64', async () => {
+    vi.mocked(extractWithGemini).mockResolvedValueOnce({
+      id: '1',
+      filename: 'test.pdf',
+      extractedAt: new Date(),
+      type: 'receipt' as const,
+    });
 
-    await expect(extractDocument(testFile, config)).rejects.toThrow('not yet implemented');
+    const config: Config = { aiProvider: 'gemini', geminiApiKey: 'key' };
+    await extractDocument('/path/to/test.pdf', config);
 
-    fs.unlinkSync(testFile);
+    // base64 of 'fake-file-content'
+    const expectedBase64 = Buffer.from('fake-file-content').toString('base64');
+    expect(extractWithGemini).toHaveBeenCalledWith(
+      '/path/to/test.pdf',
+      expectedBase64,
+      config
+    );
   });
 });
diff --git a/packages/extract/src/__tests__/mime.test.ts b/packages/extract/src/__tests__/mime.test.ts
new file mode 100644
index 0000000..a2c1a2c
--- /dev/null
+++ b/packages/extract/src/__tests__/mime.test.ts
@@ -0,0 +1,40 @@
+import { describe, expect, it } from 'vitest';
+import { getMimeType } from '../mime';
+
+describe('getMimeType', () => {
+  it('should return correct MIME type for PDF', () => {
+    expect(getMimeType('/path/to/file.pdf')).toBe('application/pdf');
+    expect(getMimeType('file.PDF')).toBe('application/pdf');
+  });
+
+  it('should return correct MIME type for PNG', () => {
+    expect(getMimeType('/path/to/image.png')).toBe('image/png');
+    expect(getMimeType('IMAGE.PNG')).toBe('image/png');
+  });
+
+  it('should return correct MIME type for JPEG', () => {
+    expect(getMimeType('photo.jpg')).toBe('image/jpeg');
+    expect(getMimeType('photo.jpeg')).toBe('image/jpeg');
+    expect(getMimeType('PHOTO.JPG')).toBe('image/jpeg');
+  });
+
+  it('should return correct MIME type for GIF', () => {
+    expect(getMimeType('animation.gif')).toBe('image/gif');
+  });
+
+  it('should return correct MIME type for WebP', () => {
+    expect(getMimeType('image.webp')).toBe('image/webp');
+  });
+
+  it('should default to application/pdf for unknown extensions', () => {
+    expect(getMimeType('file.txt')).toBe('application/pdf');
+    expect(getMimeType('file.doc')).toBe('application/pdf');
+    expect(getMimeType('file')).toBe('application/pdf');
+  });
+
+  it('should handle paths with multiple dots', () => {
+    expect(getMimeType('/path/to/my.file.name.pdf')).toBe('application/pdf');
+    expect(getMimeType('image.backup.png')).toBe('image/png');
+  });
+});
+
diff --git a/packages/extract/src/__tests__/ocr.test.ts b/packages/extract/src/__tests__/ocr.test.ts
index b970bd8..ce29abd 100644
--- a/packages/extract/src/__tests__/ocr.test.ts
+++ b/packages/extract/src/__tests__/ocr.test.ts
@@ -1,134 +1,134 @@
-import { readFileSync } from 'node:fs';
-import { resolve } from 'node:path';
-import { describe, expect, it, vi } from 'vitest';
+import { beforeEach, describe, expect, it, vi } from 'vitest';
+import { ocrImages } from '../ocr';
 
-// Mock tesseract.js to avoid worker issues in tests
+// Mock tesseract.js
 vi.mock('tesseract.js', () => ({
   default: {
-    recognize: vi.fn().mockResolvedValue({
-      data: {
-        text: 'Mocked OCR text\nTaqueria 10/10\n$5.99\n$4.49',
-      },
+    recognize: vi.fn(),
+  },
+}));
+
+// Mock kero
+vi.mock('@lytics/kero', () => ({
+  default: {
+    createLogger: () => ({
+      debug: vi.fn(),
+      error: vi.fn(),
     }),
   },
 }));
 
-describe('OCR Processing', () => {
-  // Skip if running in CI without the example file
-  const examplePath = resolve(__dirname, '../../../../examples/tacqueria-receipt.pdf');
-
-  it('should extract text from PDF using OCR', async () => {
-    // Import dynamically to avoid issues with tesseract worker
-    const { extractDocument } = await import('../index');
-
-    // Check if example file exists
-    let fileExists = false;
-    try {
-      readFileSync(examplePath);
-      fileExists = true;
-    } catch {
-      fileExists = false;
-    }
-
-    if (!fileExists) {
-      console.log('Skipping OCR test - example file not found');
-      return;
-    }
-
-    // Mock the Ollama API to return a simple response
-    const mockFetch = globalThis.fetch;
-    globalThis.fetch = async (url: string | URL | Request) => {
-      const urlStr = typeof url === 'string' ? url : url.toString();
-
-      if (urlStr.includes('localhost:11434')) {
-        return {
-          ok: true,
-          json: async () => ({
-            response: JSON.stringify({
-              type: 'receipt',
-              vendor: 'Taqueria 10/10',
-              amount: 22.4,
-              items: [{ description: 'Test Item', total: 5.99 }],
-            }),
-          }),
-          body: null,
-        } as Response;
-      }
-      return mockFetch(url as RequestInfo, undefined);
-    };
-
-    try {
-      const result = await extractDocument(examplePath, {
-        aiProvider: 'ollama',
-        ollamaModel: 'llama3.2-vision',
-      });
-
-      // Verify extraction completed
-      expect(result).toBeDefined();
-      expect(result.id).toBeDefined();
-      expect(result.filename).toBe('tacqueria-receipt.pdf');
-    } finally {
-      globalThis.fetch = mockFetch;
-    }
+import Tesseract from 'tesseract.js';
+
+describe('ocrImages', () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
   });
 
-  it('should handle OCR errors gracefully', async () => {
-    const { extractDocument } = await import('../index');
-
-    // Create a mock that simulates OCR failure by using invalid image data
-    const mockFetch = globalThis.fetch;
-    globalThis.fetch = async (url: string | URL | Request) => {
-      const urlStr = typeof url === 'string' ? url : url.toString();
-
-      if (urlStr.includes('localhost:11434')) {
-        return {
-          ok: true,
-          json: async () => ({
-            response: JSON.stringify({
-              type: 'receipt',
-              vendor: 'Test',
-              amount: 10,
-            }),
-          }),
-          body: null,
-        } as Response;
-      }
-      return mockFetch(url as RequestInfo, undefined);
-    };
-
-    try {
-      // This should not throw even if OCR fails internally
-      // The extraction should proceed with whatever data is available
-      const result = await extractDocument(examplePath, {
-        aiProvider: 'ollama',
-        ollamaModel: 'llama3.2-vision',
-      });
-
-      expect(result).toBeDefined();
-    } finally {
-      globalThis.fetch = mockFetch;
-    }
+  it('should return empty string for empty images array', async () => {
+    const result = await ocrImages([]);
+    expect(result).toBe('');
   });
-});
 
-describe('getMimeType', () => {
-  it('should detect PDF mime type', async () => {
-    const { getMimeType } = await import('../index');
-    expect(getMimeType('test.pdf')).toBe('application/pdf');
-    expect(getMimeType('TEST.PDF')).toBe('application/pdf');
+  it('should process single image and return text', async () => {
+    const mockRecognize = vi.mocked(Tesseract.recognize);
+    mockRecognize.mockResolvedValueOnce({
+      data: { text: 'Hello World' },
+    } as Tesseract.RecognizeResult);
+
+    const imageBuffer = Buffer.from('fake-image-data');
+    const result = await ocrImages([imageBuffer]);
+
+    expect(result).toBe('--- Page 1 ---\nHello World');
+    expect(mockRecognize).toHaveBeenCalledWith(imageBuffer, 'eng', expect.any(Object));
   });
 
-  it('should detect image mime types', async () => {
-    const { getMimeType } = await import('../index');
-    expect(getMimeType('test.png')).toBe('image/png');
-    expect(getMimeType('test.jpg')).toBe('image/jpeg');
-    expect(getMimeType('test.jpeg')).toBe('image/jpeg');
-    expect(getMimeType('test.gif')).toBe('image/gif');
-    expect(getMimeType('test.webp')).toBe('image/webp');
+  it('should process multiple images in parallel', async () => {
+    const mockRecognize = vi.mocked(Tesseract.recognize);
+    mockRecognize
+      .mockResolvedValueOnce({ data: { text: 'Page 1 content' } } as Tesseract.RecognizeResult)
+      .mockResolvedValueOnce({ data: { text: 'Page 2 content' } } as Tesseract.RecognizeResult)
+      .mockResolvedValueOnce({ data: { text: 'Page 3 content' } } as Tesseract.RecognizeResult);
+
+    const images = [
+      Buffer.from('image1'),
+      Buffer.from('image2'),
+      Buffer.from('image3'),
+    ];
+    const result = await ocrImages(images);
+
+    expect(result).toContain('--- Page 1 ---');
+    expect(result).toContain('--- Page 2 ---');
+    expect(result).toContain('--- Page 3 ---');
+    expect(mockRecognize).toHaveBeenCalledTimes(3);
   });
 
-  it('should default to PDF for unknown extensions', async () => {
-    const { getMimeType } = await import('../index');
-    expect(getMimeType('test.unknown')).toBe('application/pdf');
+  it('should filter out empty pages', async () => {
+    const mockRecognize = vi.mocked(Tesseract.recognize);
+    mockRecognize
+      .mockResolvedValueOnce({ data: { text: 'Has content' } } as Tesseract.RecognizeResult)
+      .mockResolvedValueOnce({ data: { text: '   ' } } as Tesseract.RecognizeResult); // Whitespace only
+
+    const images = [Buffer.from('image1'), Buffer.from('image2')];
+    const result = await ocrImages(images);
+
+    expect(result).toBe('--- Page 1 ---\nHas content');
+    expect(result).not.toContain('Page 2');
+  });
+
+  it('should call progress callback during recognition', async () => {
+    const mockRecognize = vi.mocked(Tesseract.recognize);
+    let capturedLogger: ((m: { status: string; progress: number }) => void) | undefined;
+
+    mockRecognize.mockImplementation((_image, _lang, options) => {
+      capturedLogger = options?.logger as (m: { status: string; progress: number }) => void;
+      // Simulate progress callbacks
+      if (capturedLogger) {
+        capturedLogger({ status: 'recognizing text', progress: 0.5 });
+        capturedLogger({ status: 'recognizing text', progress: 1.0 });
+      }
+      return Promise.resolve({ data: { text: 'Result' } } as Tesseract.RecognizeResult);
+    });
+
+    const progressCallback = vi.fn();
+    const images = [Buffer.from('image1')];
+
+    await ocrImages(images, progressCallback);
+
+    expect(progressCallback).toHaveBeenCalledWith(1, 1, 0.5, 'recognizing text');
+    expect(progressCallback).toHaveBeenCalledWith(1, 1, 1.0, 'recognizing text');
+  });
+
+  it('should handle OCR errors gracefully for individual pages', async () => {
+    const mockRecognize = vi.mocked(Tesseract.recognize);
+    mockRecognize
+      .mockResolvedValueOnce({ data: { text: 'Good page' } } as Tesseract.RecognizeResult)
+      .mockRejectedValueOnce(new Error('OCR failed'));
+
+    const images = [Buffer.from('image1'), Buffer.from('image2')];
+    const result = await ocrImages(images);
+
+    // Should still return the successful page
+    expect(result).toBe('--- Page 1 ---\nGood page');
+  });
+
+  it('should ignore non-recognizing status in progress callback', async () => {
+    const mockRecognize = vi.mocked(Tesseract.recognize);
+
+    mockRecognize.mockImplementation((_image, _lang, options) => {
+      const logger = options?.logger as ((m: { status: string; progress: number }) => void) | undefined;
+      if (logger) {
+        logger({ status: 'loading tesseract core', progress: 0.5 }); // Should be ignored
+        logger({ status: 'recognizing text', progress: 1.0 }); // Should be called
+      }
+      return Promise.resolve({ data: { text: 'Result' } } as Tesseract.RecognizeResult);
+    });
+
+    const progressCallback = vi.fn();
+    await ocrImages([Buffer.from('image1')], progressCallback);
+
+    // Only called for 'recognizing text' status
+    expect(progressCallback).toHaveBeenCalledTimes(1);
+    expect(progressCallback).toHaveBeenCalledWith(1, 1, 1.0, 'recognizing text');
   });
 });
diff --git a/packages/extract/src/__tests__/pdf.test.ts b/packages/extract/src/__tests__/pdf.test.ts
new file mode 100644
index 0000000..426d30a
--- /dev/null
+++ b/packages/extract/src/__tests__/pdf.test.ts
@@ -0,0 +1,89 @@
+import { beforeEach, describe, expect, it, vi } from 'vitest';
+
+// Mock pdf-to-img
+vi.mock('pdf-to-img', () => ({
+  pdf: vi.fn(),
+}));
+
+// Mock kero
+vi.mock('@lytics/kero', () => ({
+  default: {
+    createLogger: () => ({
+      debug: vi.fn(),
+      error: vi.fn(),
+    }),
+  },
+}));
+
+import { pdf } from 'pdf-to-img';
+import { pdfToImages } from '../pdf';
+
+describe('pdfToImages', () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  it('should convert PDF to array of image buffers', async () => {
+    const mockPdf = vi.mocked(pdf);
+    const mockPages = [
+      new Uint8Array([1, 2, 3]),
+      new Uint8Array([4, 5, 6]),
+    ];
+
+    // Create async iterator
+    mockPdf.mockResolvedValueOnce({
+      [Symbol.asyncIterator]: async function* () {
+        for (const page of mockPages) {
+          yield page;
+        }
+      },
+    } as AsyncIterable<Uint8Array>);
+
+    const result = await pdfToImages('/path/to/test.pdf');
+
+    expect(result).toHaveLength(2);
+    expect(result![0]).toBeInstanceOf(Buffer);
+    expect(result![1]).toBeInstanceOf(Buffer);
+    expect(mockPdf).toHaveBeenCalledWith('/path/to/test.pdf', { scale: 2 });
+  });
+
+  it('should return null for empty PDF', async () => {
+    const mockPdf = vi.mocked(pdf);
+
+    mockPdf.mockResolvedValueOnce({
+      [Symbol.asyncIterator]: async function* () {
+        // No pages
+      },
+    } as AsyncIterable<Uint8Array>);
+
+    const result = await pdfToImages('/path/to/empty.pdf');
+
+    expect(result).toBeNull();
+  });
+
+  it('should return null on PDF conversion error', async () => {
+    const mockPdf = vi.mocked(pdf);
+    mockPdf.mockRejectedValueOnce(new Error('Invalid PDF'));
+
+    const result = await pdfToImages('/path/to/invalid.pdf');
+
+    expect(result).toBeNull();
+  });
+
+  it('should handle single page PDF', async () => {
+    const mockPdf = vi.mocked(pdf);
+    const mockPage = new Uint8Array([1, 2, 3, 4, 5]);
+
+    mockPdf.mockResolvedValueOnce({
+      [Symbol.asyncIterator]: async function* () {
+        yield mockPage;
+      },
+    } as AsyncIterable<Uint8Array>);
+
+    const result = await pdfToImages('/path/to/single-page.pdf');
+
+    expect(result).toHaveLength(1);
+    expect(Buffer.from(result![0])).toEqual(Buffer.from(mockPage));
+  });
+});
+
diff --git a/packages/extract/src/__tests__/schemas.test.ts b/packages/extract/src/__tests__/schemas.test.ts
new file mode 100644
index 0000000..2980dd8
--- /dev/null
+++ b/packages/extract/src/__tests__/schemas.test.ts
@@ -0,0 +1,132 @@
+import { describe, expect, it } from 'vitest';
+import { DocumentDataSchema, LineItemSchema } from '../schemas';
+
+describe('LineItemSchema', () => {
+  it('should parse valid line item', () => {
+    const result = LineItemSchema.parse({
+      description: 'Coffee',
+      quantity: 2,
+      unitPrice: 3.50,
+      total: 7.00,
+    });
+
+    expect(result).toEqual({
+      description: 'Coffee',
+      quantity: 2,
+      unitPrice: 3.50,
+      total: 7.00,
+    });
+  });
+
+  it('should coerce string numbers to numbers', () => {
+    const result = LineItemSchema.parse({
+      description: 'Item',
+      quantity: '2',
+      unitPrice: '3.50',
+      total: '7.00',
+    });
+
+    expect(result.quantity).toBe(2);
+    expect(result.unitPrice).toBe(3.50);
+    expect(result.total).toBe(7.00);
+  });
+
+  it('should normalize price to total', () => {
+    const result = LineItemSchema.parse({
+      description: 'Item',
+      price: 9.99, // Some models output "price" instead of "total"
+    });
+
+    expect(result.total).toBe(9.99);
+  });
+
+  it('should prefer total over price when both present', () => {
+    const result = LineItemSchema.parse({
+      description: 'Item',
+      total: 10.00,
+      price: 5.00,
+    });
+
+    expect(result.total).toBe(10.00);
+  });
+
+  it('should handle missing optional fields', () => {
+    const result = LineItemSchema.parse({
+      description: 'Simple item',
+    });
+
+    expect(result).toEqual({
+      description: 'Simple item',
+      quantity: undefined,
+      unitPrice: undefined,
+      total: undefined,
+    });
+  });
+});
+
+describe('DocumentDataSchema', () => {
+  it('should parse valid document data', () => {
+    const result = DocumentDataSchema.parse({
+      type: 'receipt',
+      vendor: 'Coffee Shop',
+      amount: 15.99,
+      date: '2024-01-15',
+      items: [{ description: 'Latte', total: 5.99 }],
+    });
+
+    expect(result.type).toBe('receipt');
+    expect(result.vendor).toBe('Coffee Shop');
+    expect(result.amount).toBe(15.99);
+  });
+
+  it('should default type to other when missing', () => {
+    const result = DocumentDataSchema.parse({
+      vendor: 'Some Place',
+    });
+
+    expect(result.type).toBe('other');
+  });
+
+  it('should catch invalid type and default to other', () => {
+    const result = DocumentDataSchema.parse({
+      type: 'invalid_type',
+      vendor: 'Place',
+    });
+
+    expect(result.type).toBe('other');
+  });
+
+  it('should transform null to undefined', () => {
+    const result = DocumentDataSchema.parse({
+      type: 'invoice',
+      vendor: null,
+      amount: null,
+      date: null,
+      items: null,
+    });
+
+    expect(result.vendor).toBeUndefined();
+    expect(result.amount).toBeUndefined();
+    expect(result.date).toBeUndefined();
+    expect(result.items).toBeUndefined();
+  });
+
+  it('should coerce string amounts to numbers', () => {
+    const result = DocumentDataSchema.parse({
+      type: 'receipt',
+      amount: '99.99',
+    });
+
+    expect(result.amount).toBe(99.99);
+  });
+
+  it('should parse all valid document types', () => {
+    const types = ['invoice', 'receipt', 'bank_statement', 'other'] as const;
+
+    for (const type of types) {
+      const result = DocumentDataSchema.parse({ type });
+      expect(result.type).toBe(type);
+    }
+  });
+});
+
diff --git a/packages/extract/src/extract.ts b/packages/extract/src/extract.ts
new file mode 100644
index 0000000..baf4db2
--- /dev/null
+++ b/packages/extract/src/extract.ts
@@ -0,0 +1,35 @@
+import { readFileSync } from 'node:fs';
+import type { Config, DocumentData } from '@doc-agent/core';
+import { extractWithGemini } from './providers/gemini';
+import { extractWithOllama } from './providers/ollama';
+import type { ExtractOptions } from './types';
+
+/**
+ * Extract structured data from a document (PDF or image)
+ * Routes to the appropriate AI provider based on config
+ */
+export async function extractDocument(
+  filePath: string,
+  config: Config,
+  options?: ExtractOptions
+): Promise<DocumentData> {
+  options?.onStream?.({
+    type: 'log',
+    level: 'info',
+    message: `Starting extraction with ${config.aiProvider}`,
+    data: { filePath, provider: config.aiProvider },
+  });
+
+  const fileBuffer = readFileSync(filePath);
+  const base64 = fileBuffer.toString('base64');
+
+  if (config.aiProvider === 'gemini') {
+    return extractWithGemini(filePath, base64, config);
+  }
+
+  if (config.aiProvider === 'ollama') {
+    return extractWithOllama(filePath, base64, config, 0, options?.onStream);
+  }
+
+  throw new Error(`Provider ${config.aiProvider} not yet implemented`);
+}
diff --git a/packages/extract/src/index.ts b/packages/extract/src/index.ts
index f5be816..1dfa3fc 100644
--- a/packages/extract/src/index.ts
+++ b/packages/extract/src/index.ts
@@ -1,335 +1,21 @@
-import { readFileSync } from 'node:fs';
-import { extname } from 'node:path';
-import type { Config, DocumentData } from '@doc-agent/core';
-import { GoogleGenerativeAI } from '@google/generative-ai';
-import { pdf } from 'pdf-to-img';
-import Tesseract from 'tesseract.js';
-import { z } from 'zod';
-
-// Zod schema for DocumentData validation (lenient to handle model variations)
-// Use coerce to handle strings like "22.40" -> 22.40
-const LineItemSchema = z
-  .object({
-    description: z.string(),
-    quantity: z.coerce.number().optional(),
-    unitPrice: z.coerce.number().optional(),
-    total: z.coerce.number().optional(),
-    price: z.coerce.number().optional(), // Some models output "price" instead of "total"
-  })
-  .transform((item) => ({
-    description: item.description,
-    quantity: item.quantity,
-    unitPrice: item.unitPrice,
-    total: item.total ?? item.price, // Normalize price -> total
-  }));
-
-const DocumentDataSchema = z.object({
-  // Default to 'other' if type is missing or invalid
-  type: z.enum(['invoice', 'receipt', 'bank_statement', 'other']).default('other').catch('other'),
-  vendor: z
-    .string()
-    .nullish()
-    .transform((v) => v ?? undefined),
-  amount: z.coerce
-    .number()
-    .nullish()
-    .transform((v) => v ?? undefined),
-  date: z
-    .string()
-    .nullish()
-    .transform((v) => v ?? undefined),
-  items: z
-    .array(LineItemSchema)
-    .nullish()
-    .transform((v) => v ?? undefined),
-  rawText: z
-    .string()
-    .nullish()
-    .transform((v) => v ?? undefined),
-});
-
-// Helper to detect MIME type from file extension
-export function getMimeType(filePath: string): string {
-  const ext = extname(filePath).toLowerCase();
-  const mimeTypes: Record<string, string> = {
-    '.pdf': 'application/pdf',
-    '.png': 'image/png',
-    '.jpg': 'image/jpeg',
-    '.jpeg': 'image/jpeg',
-    '.gif': 'image/gif',
-    '.webp': 'image/webp',
-  };
-  return mimeTypes[ext] || 'application/pdf';
-}
-
-// Convert PDF to PNG images (all pages) for vision models that don't support PDF
-// Returns array of base64 images, or null if conversion fails
-async function pdfToImages(filePath: string): Promise<Buffer[] | null> {
-  try {
-    const document = await pdf(filePath, { scale: 2 });
-    const pages: Buffer[] = [];
-
-    for await (const page of document) {
-      pages.push(Buffer.from(page));
-    }
-
-    return pages.length > 0 ? pages : null;
-  } catch {
-    // Invalid PDF or other error
-    return null;
-  }
-}
-
-// OCR all images in parallel using tesseract.js
-// Returns concatenated text with page markers
-async function ocrImages(images: Buffer[]): Promise<string> {
-  if (images.length === 0) return '';
-
-  try {
-    // Process all pages in parallel
-    const results = await Promise.all(
-      images.map(async (image, index) => {
-        try {
-          const result = await Tesseract.recognize(image, 'eng', {
-            logger: () => {}, // Silent
-          });
-          return { page: index + 1, text: result.data.text };
-        } catch {
-          return { page: index + 1, text: '' };
-        }
-      })
-    );
-
-    // Concatenate with page markers
-    return results
-      .filter((r) => r.text.trim())
-      .map((r) => `--- Page ${r.page} ---\n${r.text.trim()}`)
-      .join('\n\n');
-  } catch {
-    return '';
-  }
-}
-
-export type StreamChunk =
-  | { type: 'prompt'; content: string }
-  | { type: 'response'; content: string };
-
-export type StreamCallback = (chunk: StreamChunk) => void;
-
-export interface ExtractOptions {
-  onStream?: StreamCallback;
-}
-
-export async function extractDocument(
-  filePath: string,
-  config: Config,
-  options?: ExtractOptions
-): Promise<DocumentData> {
-  const fileBuffer = readFileSync(filePath);
-  const base64 = fileBuffer.toString('base64');
-
-  if (config.aiProvider === 'gemini') {
-    return extractWithGemini(filePath, base64, config);
-  }
-
-  if (config.aiProvider === 'ollama') {
-    return extractWithOllama(filePath, base64, config, 0, options?.onStream);
-  }
-
-  throw new Error(`Provider ${config.aiProvider} not yet implemented`);
-}
-
-async function extractWithGemini(
-  filePath: string,
-  base64: string,
-  config: Config
-): Promise<DocumentData> {
-  if (!config.geminiApiKey) {
-    throw new Error('Gemini API key required. Set GEMINI_API_KEY env variable.');
-  }
-
-  const genai = new GoogleGenerativeAI(config.geminiApiKey);
-  const modelName = config.geminiModel || 'gemini-2.5-flash';
-  const model = genai.getGenerativeModel({ model: modelName });
-
-  const prompt = `Extract structured data from this document as JSON:
-{
-  "type": "invoice" | "receipt" | "bank_statement",
-  "vendor": "company name",
-  "amount": total_amount_number,
-  "date": "YYYY-MM-DD",
-  "items": [{"description": "...", "total": number}]
-}
-
-Only respond with valid JSON, no markdown formatting.`;
-
-  const result = await model.generateContent([
-    prompt,
-    {
-      inlineData: {
-        data: base64,
-        mimeType: 'application/pdf',
-      },
-    },
-  ]);
-
-  const text = result.response.text();
-  const extracted = JSON.parse(text.replace(/```json\n?/g, '').replace(/```\n?/g, ''));
-
-  return {
-    id: crypto.randomUUID(),
-    filename: filePath.split('/').pop() || 'unknown',
-    extractedAt: new Date(),
-    ...extracted,
-  };
-}
-
-async function extractWithOllama(
-  filePath: string,
-  base64: string,
-  config: Config,
-  retryCount = 0,
-  onStream?: StreamCallback
-): Promise<DocumentData> {
-  const model = config.ollamaModel || 'llama3.2-vision';
-  const mimeType = getMimeType(filePath);
-
-  // Ollama vision models don't support PDF - convert to images first
-  let imageBase64 = base64;
-  let ocrText = '';
-
-  if (mimeType === 'application/pdf') {
-    const pages = await pdfToImages(filePath);
-    if (pages && pages.length > 0) {
-      // Use first page for vision model
-      imageBase64 = pages[0].toString('base64');
-
-      // OCR all pages in parallel for text reference
-      if (onStream) {
-        onStream({ type: 'prompt', content: `Running OCR on ${pages.length} page(s)...` });
-      }
-      ocrText = await ocrImages(pages);
-    }
-  } else {
-    // For images, OCR the single image
-    const imageBuffer = Buffer.from(base64, 'base64');
-    ocrText = await ocrImages([imageBuffer]);
-  }
-
-  const systemPrompt = `Extract receipt/invoice data as JSON.
-
-Schema:
-{"type":"receipt"|"invoice"|"bank_statement"|"other","vendor":"string","amount":number,"date":"YYYY-MM-DD","items":[{"description":"string","total":number}]}
-
-Rules:
-- amount = final total paid
-- items = only purchased items (not tax/fees/service charges)
-- date in YYYY-MM-DD format
-- Use the OCR text below as the primary source for text and numbers
-- The image is for layout context only`;
-
-  // Include OCR text in the user prompt if available
-  const userPrompt = ocrText
-    ? `OCR Text (use this for accurate text/numbers):\n${ocrText}\n\nExtract structured data from this document.`
-    : `Extract structured data from this ${mimeType.includes('image') ? 'image' : 'document'}.`;
-
-  try {
-    const shouldStream = !!onStream;
-
-    // Emit full prompts so user can see what we're asking
-    if (onStream) {
-      onStream({
-        type: 'prompt',
-        content: `System:\n${systemPrompt}\n\nUser:\n${userPrompt}`,
-      });
-    }
-
-    const response = await fetch('http://localhost:11434/api/generate', {
-      method: 'POST',
-      headers: {
-        'Content-Type': 'application/json',
-      },
-      body: JSON.stringify({
-        model,
-        prompt: userPrompt,
-        system: systemPrompt,
-        images: [imageBase64],
-        stream: shouldStream,
-        format: 'json', // Force valid JSON output
-      }),
-    });
-
-    if (!response.ok) {
-      const errorText = await response.text();
-      throw new Error(`Ollama API error: ${response.status} ${errorText}`);
-    }
-
-    let fullResponse = '';
-
-    if (shouldStream && response.body) {
-      // Stream the response
-      const reader = response.body.getReader();
-      const decoder = new TextDecoder();
-
-      while (true) {
-        const { done, value } = await reader.read();
-        if (done) break;
-
-        const chunk = decoder.decode(value, { stream: true });
-        // Ollama streams newline-delimited JSON
-        for (const line of chunk.split('\n').filter(Boolean)) {
-          try {
-            const json = JSON.parse(line) as { response?: string; done?: boolean };
-            if (json.response) {
-              fullResponse += json.response;
-              onStream({ type: 'response', content: json.response });
-            }
-          } catch {
-            // Ignore parse errors for partial lines
-          }
-        }
-      }
-    } else {
-      // Non-streaming response
-      const data = (await response.json()) as { response: string };
-      fullResponse = data.response;
-    }
-
-    let parsed: unknown;
-
-    try {
-      // With format: 'json', Ollama should return valid JSON directly
-      parsed = JSON.parse(fullResponse.trim());
-    } catch (_parseError) {
-      // Fallback: try to extract JSON from response
-      const jsonStart = fullResponse.indexOf('{');
-      const jsonEnd = fullResponse.lastIndexOf('}');
-      if (jsonStart !== -1 && jsonEnd !== -1) {
-        try {
-          parsed = JSON.parse(fullResponse.slice(jsonStart, jsonEnd + 1));
-        } catch {
-          throw new Error(`Failed to parse JSON response: ${fullResponse}`);
-        }
-      } else {
-        throw new Error(`Failed to parse JSON response: ${fullResponse}`);
-      }
-    }
-
-    // Validate with Zod
-    const validated = DocumentDataSchema.parse(parsed);
-
-    // Build complete DocumentData object
-    return {
-      id: crypto.randomUUID(),
-      filename: filePath.split('/').pop() || 'unknown',
-      extractedAt: new Date(),
-      ...validated,
-    };
-  } catch (error) {
-    // Retry once on validation failure (without streaming for retry)
-    if (retryCount === 0 && error instanceof z.ZodError) {
-      return extractWithOllama(filePath, base64, config, 1, undefined);
-    }
-    throw error;
-  }
-}
+/**
+ * @doc-agent/extract
+ *
+ * Document extraction module for PDFs and images using Vision AI.
+ * Supports Ollama (local) and Gemini (cloud) providers.
+ */
+
+// Main extraction function
+export { extractDocument } from './extract';
+
+// Types
+export type { ExtractOptions, LogLevel, OcrProgressCallback, StreamCallback, StreamChunk } from './types';
+
+// Utilities (for testing/advanced usage)
+export { getMimeType } from './mime';
+export { ocrImages } from './ocr';
+export { pdfToImages } from './pdf';
+export { DocumentDataSchema, LineItemSchema } from './schemas';
+
+// Providers (for direct access if needed)
+export { extractWithGemini, extractWithOllama } from './providers';
diff --git a/packages/extract/src/mime.ts b/packages/extract/src/mime.ts
new file mode 100644
index 0000000..4b7d293
--- /dev/null
+++ b/packages/extract/src/mime.ts
@@ -0,0 +1,23 @@
+import { extname } from 'node:path';
+
+/**
+ * MIME type mappings for supported file extensions
+ */
+const MIME_TYPES: Record<string, string> = {
+  '.pdf': 'application/pdf',
+  '.png': 'image/png',
+  '.jpg': 'image/jpeg',
+  '.jpeg': 'image/jpeg',
+  '.gif': 'image/gif',
+  '.webp': 'image/webp',
+};
+
+/**
+ * Detect MIME type from file extension
+ * Defaults to 'application/pdf' for unknown extensions
+ */
+export function getMimeType(filePath: string): string {
+  const ext = extname(filePath).toLowerCase();
+  return MIME_TYPES[ext] || 'application/pdf';
+}
+
diff --git a/packages/extract/src/ocr.ts b/packages/extract/src/ocr.ts
new file mode 100644
index 0000000..21268dd
--- /dev/null
+++ b/packages/extract/src/ocr.ts
@@ -0,0 +1,72 @@
+import Tesseract from 'tesseract.js';
+import type { LogLevel, OcrProgressCallback, StreamCallback } from './types';
+
+/**
+ * Helper to emit log via stream callback
+ */
+function emitLog(
+  onStream: StreamCallback | undefined,
+  level: LogLevel,
+  message: string,
+  data?: Record<string, unknown>
+): void {
+  onStream?.({ type: 'log', level, message, data });
+}
+
+/**
+ * OCR all images in parallel using tesseract.js
+ * @returns Concatenated text with page markers
+ */
+export async function ocrImages(
+  images: Buffer[],
+  onProgress?: OcrProgressCallback,
+  onStream?: StreamCallback
+): Promise<string> {
+  if (images.length === 0) return '';
+
+  try {
+    const totalPages = images.length;
+
+    // Process all pages in parallel
+    const results = await Promise.all(
+      images.map(async (image, index) => {
+        const pageNum = index + 1;
+        try {
+          const result = await Tesseract.recognize(image, 'eng', {
+            logger: (m) => {
+              if (onProgress && m.status === 'recognizing text') {
+                onProgress(pageNum, totalPages, m.progress, m.status);
+              }
+            },
+          });
+          emitLog(onStream, 'debug', `OCR completed for page ${pageNum}`, {
+            page: pageNum,
+            textLength: result.data.text.length,
+          });
+          return { page: pageNum, text: result.data.text };
+        } catch (error) {
+          emitLog(onStream, 'error', `OCR failed for page ${pageNum}`, {
+            page: pageNum,
+            error: String(error),
+          });
+          return { page: pageNum, text: '' };
+        }
+      })
+    );
+
+    // Concatenate with page markers
+    const ocrText = results
+      .filter((r) => r.text.trim())
+      .map((r) => `--- Page ${r.page} ---\n${r.text.trim()}`)
+      .join('\n\n');
+
+    emitLog(onStream, 'info', `OCR complete: ${totalPages} pages, ${ocrText.length} chars`, {
+      totalPages,
+      totalTextLength: ocrText.length,
+    });
+    return ocrText;
+  } catch (error) {
+    emitLog(onStream, 'error', 'OCR batch failed', { error: String(error) });
+    return '';
+  }
+}
diff --git a/packages/extract/src/pdf.ts b/packages/extract/src/pdf.ts
new file mode 100644
index 0000000..ce4aff9
--- /dev/null
+++ b/packages/extract/src/pdf.ts
@@ -0,0 +1,45 @@
+import { pdf } from 'pdf-to-img';
+import type { LogLevel, StreamCallback } from './types';
+
+/**
+ * Helper to emit log via stream callback
+ */
+function emitLog(
+  onStream: StreamCallback | undefined,
+  level: LogLevel,
+  message: string,
+  data?: Record<string, unknown>
+): void {
+  onStream?.({ type: 'log', level, message, data });
+}
+
+/**
+ * Convert PDF to PNG images (all pages) for vision models that don't support PDF
+ * @returns Array of image buffers, or null if conversion fails
+ */
+export async function pdfToImages(
+  filePath: string,
+  onStream?: StreamCallback
+): Promise<Buffer[] | null> {
+  try {
+    // Higher scale = better OCR quality (3 is good balance of quality vs size)
+    const document = await pdf(filePath, { scale: 3 });
+    const pages: Buffer[] = [];
+
+    for await (const page of document) {
+      pages.push(Buffer.from(page));
+    }
+
+    emitLog(onStream, 'debug', `PDF converted: ${pages.length} pages`, {
+      filePath,
+      pageCount: pages.length,
+    });
+    return pages.length > 0 ? pages : null;
+  } catch (error) {
+    emitLog(onStream, 'error', 'PDF conversion failed', {
+      filePath,
+      error: String(error),
+    });
+    return null;
+  }
+}
diff --git a/packages/extract/src/providers/gemini.ts b/packages/extract/src/providers/gemini.ts
new file mode 100644
index 0000000..7faf3db
--- /dev/null
+++ b/packages/extract/src/providers/gemini.ts
@@ -0,0 +1,51 @@
+import type { Config, DocumentData } from '@doc-agent/core';
+import { GoogleGenerativeAI } from '@google/generative-ai';
+
+/**
+ * Extract document data using Google Gemini Vision API
+ */
+export async function extractWithGemini(
+  filePath: string,
+  base64: string,
+  config: Config
+): Promise<DocumentData> {
+  if (!config.geminiApiKey) {
+    throw new Error('Gemini API key required. Set GEMINI_API_KEY env variable.');
+  }
+
+  const genai = new GoogleGenerativeAI(config.geminiApiKey);
+  const modelName = config.geminiModel || 'gemini-2.5-flash';
+  const model = genai.getGenerativeModel({ model: modelName });
+
+  const prompt = `Extract structured data from this document as JSON:
+{
+  "type": "invoice" | "receipt" | "bank_statement",
+  "vendor": "company name",
+  "amount": total_amount_number,
+  "date": "YYYY-MM-DD",
+  "items": [{"description": "...", "total": number}]
+}
+
+Only respond with valid JSON, no markdown formatting.`;
+
+  const result = await model.generateContent([
+    prompt,
+    {
+      inlineData: {
+        data: base64,
+        mimeType: 'application/pdf',
+      },
+    },
+  ]);
+
+  const text = result.response.text();
+  const extracted = JSON.parse(text.replace(/```json\n?/g, '').replace(/```\n?/g, ''));
+
+  return {
+    id: crypto.randomUUID(),
+    filename: filePath.split('/').pop() || 'unknown',
+    extractedAt: new Date(),
+    ...extracted,
+  };
+}
+
diff --git a/packages/extract/src/providers/index.ts b/packages/extract/src/providers/index.ts
new file mode 100644
index 0000000..d17019d
--- /dev/null
+++ b/packages/extract/src/providers/index.ts
@@ -0,0 +1,3 @@
+export { extractWithGemini } from './gemini';
+export { extractWithOllama } from './ollama';
+
diff --git a/packages/extract/src/providers/ollama.ts b/packages/extract/src/providers/ollama.ts
new file mode 100644
index 0000000..571495e
--- /dev/null
+++ b/packages/extract/src/providers/ollama.ts
@@ -0,0 +1,259 @@
+import type { Config, DocumentData } from '@doc-agent/core';
+import { z } from 'zod';
+import { getMimeType } from '../mime';
+import { ocrImages } from '../ocr';
+import { pdfToImages } from '../pdf';
+import { DocumentDataSchema } from '../schemas';
+import type { LogLevel, OcrProgressCallback, StreamCallback } from '../types';
+
+/**
+ * Helper to emit log via stream callback
+ */
+function emitLog(
+  onStream: StreamCallback | undefined,
+  level: LogLevel,
+  message: string,
+  data?: Record<string, unknown>
+): void {
+  onStream?.({ type: 'log', level, message, data });
+}
+
+/**
+ * Extract document data using Ollama local vision model
+ */
+export async function extractWithOllama(
+  filePath: string,
+  base64: string,
+  config: Config,
+  retryCount = 0,
+  onStream?: StreamCallback
+): Promise<DocumentData> {
+  const model = config.ollamaModel || 'llama3.2-vision';
+  const mimeType = getMimeType(filePath);
+
+  // Ollama vision models don't support PDF - convert to images first
+  let imageBase64 = base64;
+  let ocrText = '';
+
+  // Track OCR progress per page (for parallel processing)
+  const ocrProgress = new Map<number, number>();
+  const formatOcrProgress = (totalPages: number): string => {
+    const pages = Array.from(ocrProgress.entries())
+      .sort(([a], [b]) => a - b)
+      .map(([page, pct]) => `p${page}:${pct}%`)
+      .join(' ');
+    return `OCR (${ocrProgress.size}/${totalPages}): ${pages}`;
+  };
+
+  const ocrProgressCallback: OcrProgressCallback | undefined = onStream
+    ? (page, totalPages, progress, _status) => {
+        const pct = Math.round(progress * 100);
+        ocrProgress.set(page, pct);
+        onStream({ type: 'prompt', content: formatOcrProgress(totalPages) });
+      }
+    : undefined;
+
+  if (mimeType === 'application/pdf') {
+    emitLog(onStream, 'info', `Converting PDF to images`, { filePath });
+    const pages = await pdfToImages(filePath, onStream);
+    if (pages && pages.length > 0) {
+      // Use first page for vision model
+      imageBase64 = pages[0].toString('base64');
+      emitLog(onStream, 'debug', `PDF converted`, {
+        pageCount: pages.length,
+        firstPageSize: `${Math.round(pages[0].length / 1024)}KB`,
+      });
+
+      // OCR all pages in parallel for text reference
+      emitLog(onStream, 'info', `Running OCR on ${pages.length} page(s)`, { pageCount: pages.length });
+      if (onStream) {
+        onStream({ type: 'prompt', content: `Running OCR on ${pages.length} page(s)...` });
+      }
+      ocrText = await ocrImages(pages, ocrProgressCallback, onStream);
+    }
+  } else {
+    // For images, OCR the single image
+    const imageBuffer = Buffer.from(base64, 'base64');
+    ocrText = await ocrImages([imageBuffer], ocrProgressCallback, onStream);
+  }
+
+  const systemPrompt = `Extract document data as JSON.
+
+Schema:
+{"type":"receipt"|"invoice"|"bank_statement"|"other","vendor":"string","amount":number,"date":"YYYY-MM-DD","items":[{"description":"string","total":number}]}
+
+Classification:
+- receipt = purchase from store/restaurant (has items, subtotal, tax, total)
+- invoice = bill for services/goods (has invoice number, amount due)
+- bank_statement = bank account transactions (has account number, balance)
+- other = none of the above
+
+Amount rules by type:
+- receipt: subtotal + tax (IGNORE payment lines like "Credit", "Cash", "Card")
+- invoice: "Amount Due" or "Total Due" or "Balance Due"
+- bank_statement: ending balance (can be positive or negative)
+- other: the main total amount shown
+
+General rules:
+- items = line items (products, services, transactions)
+- date in YYYY-MM-DD format
+- Use the OCR text below as the primary source for text and numbers
+- The image is for layout context only`;
+
+  // Include OCR text in the user prompt if available
+  const userPrompt = ocrText
+    ? `OCR Text (use this for accurate text/numbers):\n${ocrText}\n\nExtract structured data from this document.`
+    : `Extract structured data from this ${mimeType.includes('image') ? 'image' : 'document'}.`;
+
+  try {
+    const shouldStream = !!onStream;
+
+    emitLog(onStream, 'info', `Starting extraction with ollama`, {
+      filePath,
+      provider: 'ollama',
+    });
+
+    // Log OCR text preview for debugging
+    if (ocrText) {
+      emitLog(onStream, 'debug', `OCR text preview (first 200 chars)`, {
+        preview: ocrText.slice(0, 200).replace(/\n/g, ' '),
+        totalLength: ocrText.length,
+      });
+    }
+
+    // Emit full prompts so user can see what we're asking
+    if (onStream) {
+      onStream({
+        type: 'prompt',
+        content: `System:\n${systemPrompt}\n\nUser:\n${userPrompt}`,
+      });
+    }
+
+    emitLog(onStream, 'debug', `Sending request to Ollama`, {
+      model,
+      promptLength: userPrompt.length,
+      hasImage: true,
+    });
+
+    const response = await fetch('http://localhost:11434/api/generate', {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+      },
+      body: JSON.stringify({
+        model,
+        prompt: userPrompt,
+        system: systemPrompt,
+        images: [imageBase64],
+        stream: shouldStream,
+        format: 'json', // Force valid JSON output
+      }),
+    });
+
+    if (!response.ok) {
+      const errorText = await response.text();
+      emitLog(onStream, 'error', `Ollama API error: ${response.status}`, {
+        status: response.status,
+        error: errorText,
+      });
+      throw new Error(`Ollama API error: ${response.status} ${errorText}`);
+    }
+
+    let fullResponse = '';
+
+    if (shouldStream && response.body) {
+      // Stream the response
+      const reader = response.body.getReader();
+      const decoder = new TextDecoder();
+
+      while (true) {
+        const { done, value } = await reader.read();
+        if (done) break;
+
+        const chunk = decoder.decode(value, { stream: true });
+        // Ollama streams newline-delimited JSON
+        for (const line of chunk.split('\n').filter(Boolean)) {
+          try {
+            const json = JSON.parse(line) as { response?: string; done?: boolean };
+            if (json.response) {
+              fullResponse += json.response;
+              onStream({ type: 'response', content: json.response });
+            }
+          } catch {
+            // Ignore parse errors for partial lines
+          }
+        }
+      }
+    } else {
+      // Non-streaming response
+      const data = (await response.json()) as { response: string };
+      fullResponse = data.response;
+    }
+
+    let parsed: unknown;
+
+    emitLog(onStream, 'debug', `Model response received`, {
+      responseLength: fullResponse.length,
+      preview: fullResponse.slice(0, 100),
+    });
+
+    try {
+      // With format: 'json', Ollama should return valid JSON directly
+      parsed = JSON.parse(fullResponse.trim());
+    } catch (_parseError) {
+      // Fallback: try to extract JSON from response
+      const jsonStart = fullResponse.indexOf('{');
+      const jsonEnd = fullResponse.lastIndexOf('}');
+      if (jsonStart !== -1 && jsonEnd !== -1) {
+        try {
+          parsed = JSON.parse(fullResponse.slice(jsonStart, jsonEnd + 1));
+        } catch {
+          emitLog(onStream, 'error', `JSON parse failed`, { response: fullResponse });
+          throw new Error(`Failed to parse JSON response: ${fullResponse}`);
+        }
+      } else {
+        emitLog(onStream, 'error', `No JSON found in response`, { response: fullResponse });
+        throw new Error(`Failed to parse JSON response: ${fullResponse}`);
+      }
+    }
+
+    emitLog(onStream, 'debug', `Raw parsed JSON`, {
+      type: (parsed as Record<string, unknown>).type,
+      vendor: (parsed as Record<string, unknown>).vendor,
+      amount: (parsed as Record<string, unknown>).amount,
+      itemCount: Array.isArray((parsed as Record<string, unknown>).items) 
+        ? ((parsed as Record<string, unknown>).items as unknown[]).length 
+        : 0,
+    });
+
+    // Validate with Zod
+    const validated = DocumentDataSchema.parse(parsed);
+
+    // Build complete DocumentData object
+    const result = {
+      id: crypto.randomUUID(),
+      filename: filePath.split('/').pop() || 'unknown',
+      extractedAt: new Date(),
+      ...validated,
+    };
+
+    emitLog(onStream, 'info', `Extraction successful: ${result.type}`, {
+      filePath,
+      type: result.type,
+      itemCount: result.items?.length ?? 0,
+    });
+
+    return result;
+  } catch (error) {
+    // Retry once on validation failure (without streaming for retry)
+    if (retryCount === 0 && error instanceof z.ZodError) {
+      emitLog(onStream, 'warn', 'Validation failed, retrying extraction', {
+        filePath,
+        errors: error.errors.map((e) => ({ path: e.path.join('.'), message: e.message })),
+      });
+      return extractWithOllama(filePath, base64, config, 1, undefined);
+    }
+    emitLog(onStream, 'error', 'Extraction failed', { filePath, error: String(error) });
+    throw error;
+  }
+}
diff --git a/packages/extract/src/schemas.ts b/packages/extract/src/schemas.ts
new file mode 100644
index 0000000..d45bec0
--- /dev/null
+++ b/packages/extract/src/schemas.ts
@@ -0,0 +1,101 @@
+import { z } from 'zod';
+
+/**
+ * Zod schema for line items in a document (receipts, invoices)
+ * Handles model variations in field names
+ */
+// Helper to coerce number and filter out NaN
+const safeNumber = z.coerce
+  .number()
+  .optional()
+  .transform((v) => (v !== undefined && !Number.isNaN(v) ? v : undefined));
+
+export const LineItemSchema = z
+  .object({
+    // Description variants
+    description: z.string().optional(),
+    name: z.string().optional(), // Some models use "name"
+    item: z.string().optional(), // Some models use "item"
+    // Quantity
+    quantity: safeNumber,
+    qty: safeNumber, // Some models use "qty"
+    // Price variants
+    unitPrice: safeNumber,
+    unit_price: safeNumber,
+    price: safeNumber,
+    total: safeNumber,
+    amount: safeNumber,
+  })
+  .transform((item) => ({
+    description: item.description || item.name || item.item || 'Unknown item',
+    quantity: item.quantity ?? item.qty,
+    unitPrice: item.unitPrice ?? item.unit_price,
+    total: item.total ?? item.price ?? item.amount,
+  }));
+
+/**
+ * Zod schema for extracted document data
+ * Lenient to handle model variations (null vs undefined, missing fields)
+ */
+/**
+ * Try to normalize a date string to ISO format (YYYY-MM-DD)
+ * Returns undefined if parsing fails
+ */
+function normalizeDate(dateStr: string | null | undefined): string | undefined {
+  if (!dateStr) return undefined;
+  
+  // Try to parse common formats
+  const parsed = new Date(dateStr);
+  if (!Number.isNaN(parsed.getTime())) {
+    // Valid date - format as YYYY-MM-DD
+    return parsed.toISOString().split('T')[0];
+  }
+  
+  // Try MM/DD/YY format (common in US receipts)
+  const mdyMatch = dateStr.match(/^(\d{1,2})\/(\d{1,2})\/(\d{2,4})/);
+  if (mdyMatch) {
+    const [, month, day, year] = mdyMatch;
+    const fullYear = year.length === 2 ? `20${year}` : year;
+    const normalized = new Date(`${fullYear}-${month.padStart(2, '0')}-${day.padStart(2, '0')}`);
+    if (!Number.isNaN(normalized.getTime())) {
+      return normalized.toISOString().split('T')[0];
+    }
+  }
+  
+  return undefined;
+}
+
+export const DocumentDataSchema = z
+  .object({
+    // Default to 'other' if type is missing or invalid
+    type: z.enum(['invoice', 'receipt', 'bank_statement', 'other']).default('other').catch('other'),
+    // Vendor variants - models use different field names
+    vendor: z.string().nullish(),
+    store_name: z.string().nullish(), // Some models use "store_name"
+    merchant: z.string().nullish(), // Some models use "merchant"
+    business_name: z.string().nullish(), // Some models use "business_name"
+    // Amount variants
+    amount: z.coerce.number().nullish(),
+    total: z.coerce.number().nullish(), // Some models use "total" at root level
+    total_amount: z.coerce.number().nullish(), // Some models use "total_amount"
+    // Date
+    date: z.string().nullish(),
+    // Items
+    items: z.array(LineItemSchema).nullish(),
+    rawText: z.string().nullish(),
+  })
+  .transform((doc) => {
+    const rawDate = doc.date ?? undefined;
+    return {
+      type: doc.type,
+      vendor: doc.vendor ?? doc.store_name ?? doc.merchant ?? doc.business_name ?? undefined,
+      amount: doc.amount ?? doc.total ?? doc.total_amount ?? undefined,
+      date: normalizeDate(rawDate), // Normalized ISO date
+      dateRaw: rawDate, // Original from document
+      items: doc.items ?? undefined,
+      rawText: doc.rawText ?? undefined,
+    };
+  });
+
+export type ValidatedDocumentData = z.infer<typeof DocumentDataSchema>;
+
diff --git a/packages/extract/src/types.ts b/packages/extract/src/types.ts
new file mode 100644
index 0000000..99e5496
--- /dev/null
+++ b/packages/extract/src/types.ts
@@ -0,0 +1,27 @@
+/**
+ * Streaming types for extraction progress feedback
+ */
+
+export type LogLevel = 'debug' | 'info' | 'warn' | 'error';
+
+export type StreamChunk =
+  | { type: 'prompt'; content: string }
+  | { type: 'response'; content: string }
+  | { type: 'log'; level: LogLevel; message: string; data?: Record<string, unknown> };
+
+export type StreamCallback = (chunk: StreamChunk) => void;
+
+export interface ExtractOptions {
+  onStream?: StreamCallback;
+}
+
+/**
+ * OCR progress callback for multi-page processing
+ */
+export type OcrProgressCallback = (
+  page: number,
+  totalPages: number,
+  progress: number,
+  status: string
+) => void;
+
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 7af768c..2e04cc1 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -117,6 +117,9 @@ importers:
       '@google/generative-ai':
         specifier: ^0.24.1
         version: 0.24.1
+      '@lytics/kero':
+        specifier: ^1.0.0
+        version: 1.0.0
       pdf-to-img:
         specifier: ^5.0.0
         version: 5.0.0
@@ -2684,7 +2687,6 @@ packages:
 
   vectordb@0.21.2:
     resolution: {integrity: sha512-5tiwUq0jDtfIpcr7NY+kNCTecHCzSq0AqQtMzJphH7z6H6gfrw9t5/Aoy5/QnS0uAWIgqvCbE5qneQOFGxE+Og==}
-    cpu: [x64, arm64]
     os: [darwin, linux, win32]
     deprecated: Use @lancedb/lancedb instead.
     peerDependencies:
diff --git a/vitest.config.ts b/vitest.config.ts
index 7991075..7e5b849 100644
--- a/vitest.config.ts
+++ b/vitest.config.ts
@@ -14,10 +14,13 @@ export default defineConfig({
         '**/dist/**',
         '**/*.d.ts',
         '**/test/**',
-        '**/contexts/index.ts', // Barrel file
-        '**/hooks/index.ts', // Barrel file
-        '**/services/index.ts', // Barrel file
-        '**/components/index.ts', // Barrel file
+        // Barrel exports (re-export only, no logic)
+        '**/contexts/index.ts',
+        '**/hooks/index.ts',
+        '**/services/index.ts',
+        '**/components/index.ts',
+        '**/providers/index.ts',
+        'packages/extract/src/index.ts',
       ],
     },
   },

From c5e837c4bce7a094f511f1d1b5d7acbfd8b38650 Mon Sep 17 00:00:00 2001
From: prosdev <pros.seng@contentstack.com>
Date: Mon, 8 Dec 2025 10:40:47 -0800
Subject: [PATCH 2/5] refactor(cli): remove Ink, use ora/prompts/chalk

BREAKING: Removed React-based Ink UI in favor of simpler CLI tools.

Removed:
- components/: ExtractApp, OllamaStatus, StreamingOutput, etc.
- hooks/: useOllama, useExtraction
- contexts/: OllamaContext, ExtractionContext
- Dependencies: ink, ink-spinner, react, @types/react

Added:
- commands/extract.ts: New extraction command with ora spinners
- Dependencies: ora, prompts, chalk, @lytics/kero

Benefits:
- Simpler architecture (no React overhead)
- Kero logging visible in terminal (was swallowed by Ink)
- LOG_LEVEL=debug support for debugging
- Smaller bundle size (14KB vs 30KB)
---
 packages/cli/package.json                     |  11 +-
 packages/cli/src/cli.ts                       |  25 +-
 packages/cli/src/commands/extract.ts          | 258 +++++++++++++++
 packages/cli/src/components/ConfirmInput.tsx  |  58 ----
 packages/cli/src/components/ErrorDisplay.tsx  |  32 --
 packages/cli/src/components/ExtractApp.tsx    |  91 -----
 .../cli/src/components/ExtractionProgress.tsx |  81 -----
 packages/cli/src/components/OllamaStatus.tsx  | 219 ------------
 packages/cli/src/components/Result.tsx        |  59 ----
 .../cli/src/components/StreamingOutput.tsx    |  23 --
 .../__tests__/OllamaStatus.test.tsx           |  92 ------
 packages/cli/src/components/index.ts          |   7 -
 .../cli/src/contexts/ExtractionContext.tsx    |  39 ---
 packages/cli/src/contexts/OllamaContext.tsx   |  43 ---
 packages/cli/src/contexts/index.ts            |  12 -
 .../src/hooks/__tests__/useExtraction.test.ts | 247 --------------
 .../cli/src/hooks/__tests__/useOllama.test.ts | 311 ------------------
 packages/cli/src/hooks/index.ts               |   6 -
 packages/cli/src/hooks/useExtraction.ts       | 121 -------
 packages/cli/src/hooks/useOllama.ts           | 138 --------
 packages/cli/tsup.config.ts                   |   3 +
 21 files changed, 270 insertions(+), 1606 deletions(-)
 create mode 100644 packages/cli/src/commands/extract.ts
 delete mode 100644 packages/cli/src/components/ConfirmInput.tsx
 delete mode 100644 packages/cli/src/components/ErrorDisplay.tsx
 delete mode 100644 packages/cli/src/components/ExtractApp.tsx
 delete mode 100644 packages/cli/src/components/ExtractionProgress.tsx
 delete mode 100644 packages/cli/src/components/OllamaStatus.tsx
 delete mode 100644 packages/cli/src/components/Result.tsx
 delete mode 100644 packages/cli/src/components/StreamingOutput.tsx
 delete mode 100644 packages/cli/src/components/__tests__/OllamaStatus.test.tsx
 delete mode 100644 packages/cli/src/components/index.ts
 delete mode 100644 packages/cli/src/contexts/ExtractionContext.tsx
 delete mode 100644 packages/cli/src/contexts/OllamaContext.tsx
 delete mode 100644 packages/cli/src/contexts/index.ts
 delete mode 100644 packages/cli/src/hooks/__tests__/useExtraction.test.ts
 delete mode 100644 packages/cli/src/hooks/__tests__/useOllama.test.ts
 delete mode 100644 packages/cli/src/hooks/index.ts
 delete mode 100644 packages/cli/src/hooks/useExtraction.ts
 delete mode 100644 packages/cli/src/hooks/useOllama.ts

diff --git a/packages/cli/package.json b/packages/cli/package.json
index da1c438..85826cc 100644
--- a/packages/cli/package.json
+++ b/packages/cli/package.json
@@ -20,23 +20,18 @@
     "@doc-agent/storage": "workspace:*",
     "@doc-agent/vector-store": "workspace:*",
     "@google/generative-ai": "^0.24.1",
+    "@lytics/kero": "^1.0.0",
     "@modelcontextprotocol/sdk": "^1.24.3",
     "chalk": "^5.6.2",
     "commander": "^14.0.2",
-    "ink": "^6.5.1",
-    "ink-spinner": "^5.0.0",
     "ora": "^9.0.0",
-    "react": "^19.2.1",
+    "prompts": "^2.4.2",
     "vectordb": "^0.21.2",
     "zod": "^3.23.8"
   },
   "devDependencies": {
-    "@testing-library/dom": "^10.4.1",
-    "@testing-library/react": "^16.3.0",
     "@types/node": "^24.10.1",
-    "@types/react": "^19.2.7",
-    "ink-testing-library": "^4.0.0",
-    "jsdom": "^27.2.0",
+    "@types/prompts": "^2.4.9",
     "tsx": "^4.21.0",
     "typescript": "^5.9.3"
   }
diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts
index 7e6feed..cccb503 100644
--- a/packages/cli/src/cli.ts
+++ b/packages/cli/src/cli.ts
@@ -2,9 +2,7 @@
 import { resolve } from 'node:path';
 import chalk from 'chalk';
 import { Command } from 'commander';
-import { render } from 'ink';
-import React from 'react';
-import { ExtractApp } from './components/ExtractApp';
+import { runExtract } from './commands/extract';
 
 // Resolve paths relative to where user ran the command
 // INIT_CWD is set by pnpm to original working directory
@@ -38,22 +36,11 @@ program
     };
     const model = options.model || defaultModels[options.provider] || 'llama3.2-vision';
 
-    const { waitUntilExit } = render(
-      React.createElement(ExtractApp, {
-        file: absolutePath,
-        provider: options.provider,
-        model,
-        dryRun: options.dryRun,
-        onComplete: () => {
-          // Normal exit
-        },
-        onError: () => {
-          process.exitCode = 1;
-        },
-      })
-    );
-
-    await waitUntilExit();
+    await runExtract(absolutePath, {
+      provider: options.provider,
+      model,
+      dryRun: options.dryRun,
+    });
   });
 
 program
diff --git a/packages/cli/src/commands/extract.ts b/packages/cli/src/commands/extract.ts
new file mode 100644
index 0000000..748caf2
--- /dev/null
+++ b/packages/cli/src/commands/extract.ts
@@ -0,0 +1,258 @@
+import { resolve } from 'node:path';
+import type { Config, DocumentData } from '@doc-agent/core';
+import { extractDocument, type StreamChunk } from '@doc-agent/extract';
+import { storage } from '@doc-agent/storage';
+import kero from '@lytics/kero';
+import chalk from 'chalk';
+import ora from 'ora';
+import prompts from 'prompts';
+import {
+  checkModelExists,
+  checkOllamaInstalled,
+  checkOllamaRunning,
+  installOllama,
+  pullModel,
+  startOllama,
+  waitForOllama,
+  type PullProgress,
+} from '../services/ollama';
+
+const logger = kero.createLogger({
+  level: (process.env.LOG_LEVEL as 'trace' | 'debug' | 'info' | 'warn' | 'error' | 'fatal') || 'info',
+});
+
+export interface ExtractOptions {
+  provider: 'gemini' | 'openai' | 'ollama';
+  model: string;
+  dryRun: boolean;
+}
+
+/**
+ * Format bytes to human readable string
+ */
+function formatBytes(bytes: number): string {
+  if (bytes < 1024) return `${bytes} B`;
+  if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
+  if (bytes < 1024 * 1024 * 1024) return `${(bytes / 1024 / 1024).toFixed(1)} MB`;
+  return `${(bytes / 1024 / 1024 / 1024).toFixed(1)} GB`;
+}
+
+/**
+ * Ensure Ollama is ready (installed, running, model pulled)
+ */
+async function ensureOllamaReady(model: string): Promise<boolean> {
+  const spinner = ora();
+
+  // Check if Ollama is installed
+  spinner.start('Checking Ollama installation...');
+  const isInstalled = await checkOllamaInstalled();
+
+  if (!isInstalled) {
+    spinner.stop();
+    logger.info('Ollama not installed');
+
+    const { install } = await prompts({
+      type: 'confirm',
+      name: 'install',
+      message: 'Ollama is not installed. Install via Homebrew?',
+      initial: true,
+    });
+
+    if (!install) {
+      console.log(chalk.yellow('Please install Ollama manually: https://ollama.com/download'));
+      return false;
+    }
+
+    spinner.start('Installing Ollama via Homebrew...');
+    try {
+      await installOllama((msg) => {
+        spinner.text = msg;
+      });
+      spinner.succeed('Ollama installed');
+      logger.info('Ollama installed successfully');
+    } catch (error) {
+      spinner.fail('Failed to install Ollama');
+      logger.error({ error: String(error) }, 'Ollama installation failed');
+      console.error(chalk.red(String(error)));
+      return false;
+    }
+  } else {
+    spinner.succeed('Ollama installed');
+  }
+
+  // Check if Ollama is running
+  spinner.start('Checking Ollama server...');
+  let isRunning = await checkOllamaRunning();
+
+  if (!isRunning) {
+    spinner.stop();
+    logger.info('Ollama server not running');
+
+    const { start } = await prompts({
+      type: 'confirm',
+      name: 'start',
+      message: 'Ollama server is not running. Start it?',
+      initial: true,
+    });
+
+    if (!start) {
+      console.log(chalk.yellow('Please start Ollama: ollama serve'));
+      return false;
+    }
+
+    spinner.start('Starting Ollama server...');
+    startOllama();
+
+    isRunning = await waitForOllama(15000);
+    if (!isRunning) {
+      spinner.fail('Failed to start Ollama server');
+      logger.error('Ollama server failed to start');
+      return false;
+    }
+    spinner.succeed('Ollama server started');
+    logger.info('Ollama server started');
+  } else {
+    spinner.succeed('Ollama server running');
+  }
+
+  // Check if model exists
+  spinner.start(`Checking model: ${model}...`);
+  const modelExists = await checkModelExists(model);
+
+  if (!modelExists) {
+    spinner.text = `Pulling model: ${model}...`;
+    logger.info({ model }, 'Pulling model');
+
+    try {
+      await pullModel(model, (progress: PullProgress) => {
+        if (progress.total && progress.completed) {
+          const pct = Math.round((progress.completed / progress.total) * 100);
+          spinner.text = `Pulling ${model}: ${pct}% (${formatBytes(progress.completed)}/${formatBytes(progress.total)})`;
+        } else if (progress.status) {
+          spinner.text = `${model}: ${progress.status}`;
+        }
+      });
+      spinner.succeed(`Model ready: ${model}`);
+      logger.info({ model }, 'Model pulled successfully');
+    } catch (error) {
+      spinner.fail(`Failed to pull model: ${model}`);
+      logger.error({ model, error: String(error) }, 'Model pull failed');
+      return false;
+    }
+  } else {
+    spinner.succeed(`Model ready: ${model}`);
+  }
+
+  return true;
+}
+
+/**
+ * Run document extraction
+ */
+export async function runExtract(file: string, options: ExtractOptions): Promise<void> {
+  const { provider, model, dryRun } = options;
+  const absolutePath = resolve(file);
+
+  logger.info({ file: absolutePath, provider, model, dryRun }, 'Starting extraction');
+
+  // For Ollama, ensure everything is ready
+  if (provider === 'ollama') {
+    const ready = await ensureOllamaReady(model);
+    if (!ready) {
+      process.exitCode = 1;
+      return;
+    }
+  }
+
+  // Build config
+  const config: Config = {
+    aiProvider: provider,
+    geminiApiKey: process.env.GEMINI_API_KEY,
+    geminiModel: provider === 'gemini' ? model : undefined,
+    openaiApiKey: process.env.OPENAI_API_KEY,
+    ollamaModel: provider === 'ollama' ? model : undefined,
+  };
+
+  // Run extraction
+  const spinner = ora('Extracting document data...').start();
+  let lastPrompt = '';
+  let responseBuffer = '';
+
+  try {
+    const result = await extractDocument(absolutePath, config, {
+      onStream: (chunk: StreamChunk) => {
+        if (!chunk) return;
+        
+        if (chunk.type === 'log') {
+          // Log via kero - use simple string logging to avoid issues
+          const msg = `${chunk.message}${chunk.data ? ' ' + JSON.stringify(chunk.data) : ''}`;
+          if (chunk.level === 'error') {
+            logger.error(msg);
+          } else if (chunk.level === 'warn') {
+            logger.warn(msg);
+          } else if (chunk.level === 'debug') {
+            logger.debug(msg);
+          } else {
+            logger.info(msg);
+          }
+          
+          // Update spinner for info logs
+          if (chunk.level === 'info') {
+            spinner.text = chunk.message;
+          }
+        } else if (chunk.type === 'prompt') {
+          lastPrompt = chunk.content;
+          // Show OCR progress in spinner
+          if (chunk.content.includes('OCR')) {
+            spinner.text = chunk.content.split('\n')[0];
+          }
+          // Log full prompt at debug level (only for system/user prompts, not OCR progress)
+          if (chunk.content.includes('System:') || chunk.content.includes('User:')) {
+            logger.debug(`Prompt to model:\n${chunk.content}`);
+          }
+        } else if (chunk.type === 'response') {
+          responseBuffer += chunk.content;
+          // Show that we're receiving response
+          spinner.text = `Receiving response... (${responseBuffer.length} chars)`;
+        }
+      },
+    });
+
+    spinner.succeed('Extraction complete');
+    logger.info({ type: result.type, itemCount: result.items?.length ?? 0 }, 'Extraction successful');
+
+    // Save to database (unless dry run)
+    if (!dryRun) {
+      const saveSpinner = ora('Saving to database...').start();
+      try {
+        await storage.saveDocument(result, absolutePath);
+        saveSpinner.succeed(`Saved: ${result.filename} (ID: ${result.id})`);
+        logger.info({ id: result.id, filename: result.filename }, 'Document saved');
+      } catch (error) {
+        saveSpinner.fail('Failed to save');
+        logger.error({ error: String(error) }, 'Save failed');
+        throw error;
+      }
+    } else {
+      console.log(chalk.gray('(dry run - not saved to database)'));
+    }
+
+    // Print result
+    console.log(chalk.gray('─'.repeat(40)));
+    console.log(JSON.stringify(result, null, 2));
+
+  } catch (error) {
+    spinner.fail('Extraction failed');
+    logger.error({ error: String(error) }, 'Extraction failed');
+    
+    // Show the prompt for debugging if available
+    if (lastPrompt) {
+      console.log(chalk.gray('\n─── Last Prompt ───'));
+      console.log(chalk.gray(lastPrompt.slice(-500))); // Last 500 chars
+    }
+    
+    console.error(chalk.red(String(error)));
+    process.exitCode = 1;
+  }
+}
+
diff --git a/packages/cli/src/components/ConfirmInput.tsx b/packages/cli/src/components/ConfirmInput.tsx
deleted file mode 100644
index 5e9d7fb..0000000
--- a/packages/cli/src/components/ConfirmInput.tsx
+++ /dev/null
@@ -1,58 +0,0 @@
-import { Box, Text, useInput } from 'ink';
-
-interface ConfirmInputInteractiveProps {
-  message: string;
-  onConfirm: (confirmed: boolean) => void;
-  defaultValue: boolean;
-}
-
-function ConfirmInputInteractive({
-  message,
-  onConfirm,
-  defaultValue,
-}: ConfirmInputInteractiveProps) {
-  useInput((input, key) => {
-    if (input.toLowerCase() === 'y' || (key.return && defaultValue)) {
-      onConfirm(true);
-    } else if (input.toLowerCase() === 'n' || (key.return && !defaultValue)) {
-      onConfirm(false);
-    }
-  });
-
-  return (
-    <Box>
-      <Text>{message} </Text>
-      <Text color="gray">{defaultValue ? '[Y/n]' : '[y/N]'}</Text>
-    </Box>
-  );
-}
-
-interface ConfirmInputProps {
-  message: string;
-  onConfirm: (confirmed: boolean) => void;
-  defaultValue?: boolean;
-  /** Whether stdin supports raw mode (interactive input) */
-  isInteractive: boolean;
-}
-
-export function ConfirmInput({
-  message,
-  onConfirm,
-  defaultValue = true,
-  isInteractive,
-}: ConfirmInputProps) {
-  // Non-interactive: just show message, caller handles auto-confirm
-  if (!isInteractive) {
-    return (
-      <Box>
-        <Text>{message} </Text>
-        <Text color="gray">(auto: {defaultValue ? 'yes' : 'no'})</Text>
-      </Box>
-    );
-  }
-
-  // Interactive mode with useInput
-  return (
-    <ConfirmInputInteractive message={message} onConfirm={onConfirm} defaultValue={defaultValue} />
-  );
-}
diff --git a/packages/cli/src/components/ErrorDisplay.tsx b/packages/cli/src/components/ErrorDisplay.tsx
deleted file mode 100644
index b32dc1d..0000000
--- a/packages/cli/src/components/ErrorDisplay.tsx
+++ /dev/null
@@ -1,32 +0,0 @@
-import { Box, Text } from 'ink';
-
-interface ErrorDisplayProps {
-  title: string;
-  message: string;
-  suggestions?: string[];
-}
-
-export function ErrorDisplay({ title, message, suggestions }: ErrorDisplayProps) {
-  return (
-    <Box flexDirection="column" marginTop={1}>
-      <Box>
-        <Text color="red" bold>
-          ✗ {title}
-        </Text>
-      </Box>
-      <Box marginLeft={2}>
-        <Text color="gray">{message}</Text>
-      </Box>
-      {suggestions && suggestions.length > 0 && (
-        <Box flexDirection="column" marginTop={1} marginLeft={2}>
-          <Text color="yellow">Suggestions:</Text>
-          {suggestions.map((suggestion) => (
-            <Text key={suggestion} color="gray">
-              • {suggestion}
-            </Text>
-          ))}
-        </Box>
-      )}
-    </Box>
-  );
-}
diff --git a/packages/cli/src/components/ExtractApp.tsx b/packages/cli/src/components/ExtractApp.tsx
deleted file mode 100644
index c816a7f..0000000
--- a/packages/cli/src/components/ExtractApp.tsx
+++ /dev/null
@@ -1,91 +0,0 @@
-import { Box, useStdin } from 'ink';
-import {
-  ExtractionProvider,
-  type ExtractionService,
-  OllamaProvider,
-  type OllamaService,
-} from '../contexts';
-import { useExtraction } from '../hooks/useExtraction';
-import { useOllama } from '../hooks/useOllama';
-import { ExtractionProgress } from './ExtractionProgress';
-import { OllamaStatus } from './OllamaStatus';
-import { Result } from './Result';
-import { StreamingOutput } from './StreamingOutput';
-
-export interface ExtractAppProps {
-  file: string;
-  provider: 'gemini' | 'openai' | 'ollama';
-  model: string;
-  dryRun: boolean;
-  onComplete: () => void;
-  onError: (error: Error) => void;
-  // Optional services for testing
-  ollamaService?: OllamaService;
-  extractionService?: ExtractionService;
-}
-
-function ExtractAppInner({
-  file,
-  provider,
-  model,
-  dryRun,
-  onComplete,
-  onError,
-}: Omit<ExtractAppProps, 'ollamaService' | 'extractionService'>) {
-  const { isRawModeSupported } = useStdin();
-
-  const ollama = useOllama({
-    provider,
-    model,
-    isInteractive: isRawModeSupported,
-  });
-
-  const extraction = useExtraction({
-    file,
-    provider,
-    model,
-    dryRun,
-    shouldStart: ollama.isReady,
-    onComplete,
-    onError,
-  });
-
-  return (
-    <Box flexDirection="column" padding={1}>
-      <OllamaStatus
-        state={ollama.state}
-        isInteractive={isRawModeSupported}
-        onInstallConfirm={
-          ollama.state.status === 'prompt-install' ? ollama.handleInstallConfirm : undefined
-        }
-        onStartConfirm={
-          ollama.state.status === 'prompt-start' ? ollama.handleStartConfirm : undefined
-        }
-      />
-
-      {ollama.state.status === 'ready' && (
-        <Box marginTop={1}>
-          <ExtractionProgress state={extraction.state} dryRun={dryRun} />
-        </Box>
-      )}
-
-      {extraction.state.status === 'extracting' && extraction.responseContent && (
-        <StreamingOutput content={extraction.responseContent} />
-      )}
-
-      {extraction.result && extraction.state.status === 'complete' && (
-        <Result data={extraction.result} showJson={true} />
-      )}
-    </Box>
-  );
-}
-
-export function ExtractApp({ ollamaService, extractionService, ...props }: ExtractAppProps) {
-  return (
-    <OllamaProvider service={ollamaService}>
-      <ExtractionProvider service={extractionService}>
-        <ExtractAppInner {...props} />
-      </ExtractionProvider>
-    </OllamaProvider>
-  );
-}
diff --git a/packages/cli/src/components/ExtractionProgress.tsx b/packages/cli/src/components/ExtractionProgress.tsx
deleted file mode 100644
index f06767c..0000000
--- a/packages/cli/src/components/ExtractionProgress.tsx
+++ /dev/null
@@ -1,81 +0,0 @@
-import { Box, Text } from 'ink';
-import Spinner from 'ink-spinner';
-import { useEffect, useState } from 'react';
-
-export type ExtractionState =
-  | { status: 'idle' }
-  | { status: 'extracting'; startTime: number }
-  | { status: 'saving' }
-  | { status: 'complete'; id: string; filename: string }
-  | { status: 'error'; message: string };
-
-interface ExtractionProgressProps {
-  state: ExtractionState;
-  dryRun?: boolean;
-}
-
-export function ExtractionProgress({ state, dryRun }: ExtractionProgressProps) {
-  const [elapsed, setElapsed] = useState(0);
-
-  useEffect(() => {
-    if (state.status !== 'extracting') {
-      setElapsed(0);
-      return;
-    }
-
-    const interval = setInterval(() => {
-      setElapsed(Math.floor((Date.now() - state.startTime) / 1000));
-    }, 1000);
-
-    return () => clearInterval(interval);
-  }, [state]);
-
-  switch (state.status) {
-    case 'idle':
-      return null;
-
-    case 'extracting':
-      return (
-        <Box>
-          <Text color="cyan">
-            <Spinner type="dots" />
-          </Text>
-          <Text> Extracting document data...</Text>
-          {elapsed > 10 && <Text color="gray"> ({elapsed}s - Local AI can take a moment)</Text>}
-        </Box>
-      );
-
-    case 'saving':
-      return (
-        <Box>
-          <Text color="cyan">
-            <Spinner type="dots" />
-          </Text>
-          <Text> Saving to database...</Text>
-        </Box>
-      );
-
-    case 'complete':
-      return (
-        <Box>
-          <Text color="green">✓</Text>
-          {dryRun ? (
-            <Text> Extraction complete (dry run)</Text>
-          ) : (
-            <Text>
-              {' '}
-              Saved: {state.filename} (ID: {state.id})
-            </Text>
-          )}
-        </Box>
-      );
-
-    case 'error':
-      return (
-        <Box>
-          <Text color="red">✗</Text>
-          <Text> Extraction failed: {state.message}</Text>
-        </Box>
-      );
-  }
-}
diff --git a/packages/cli/src/components/OllamaStatus.tsx b/packages/cli/src/components/OllamaStatus.tsx
deleted file mode 100644
index 7db95ac..0000000
--- a/packages/cli/src/components/OllamaStatus.tsx
+++ /dev/null
@@ -1,219 +0,0 @@
-import { Box, Text } from 'ink';
-import Spinner from 'ink-spinner';
-import { ConfirmInput } from './ConfirmInput';
-
-export interface PullProgress {
-  status: string;
-  completed?: number;
-  total?: number;
-}
-
-export type OllamaState =
-  | { status: 'checking' }
-  | { status: 'not-installed' }
-  | { status: 'prompt-install' }
-  | { status: 'installing'; progress?: string }
-  | { status: 'not-running' }
-  | { status: 'prompt-start' }
-  | { status: 'starting' }
-  | { status: 'checking-model'; model: string }
-  | { status: 'pulling-model'; model: string; pullProgress?: PullProgress }
-  | { status: 'ready'; model: string }
-  | { status: 'error'; message: string }
-  | { status: 'cancelled' };
-
-interface OllamaStatusProps {
-  state: OllamaState;
-  isInteractive: boolean;
-  onInstallConfirm?: (confirmed: boolean) => void;
-  onStartConfirm?: (confirmed: boolean) => void;
-}
-
-export function OllamaStatus({
-  state,
-  isInteractive,
-  onInstallConfirm,
-  onStartConfirm,
-}: OllamaStatusProps) {
-  switch (state.status) {
-    case 'checking':
-      return (
-        <Box>
-          <Text color="cyan">
-            <Spinner type="dots" />
-          </Text>
-          <Text> Checking Ollama...</Text>
-        </Box>
-      );
-
-    case 'not-installed':
-      return (
-        <Box>
-          <Text color="yellow">⚠</Text>
-          <Text> Ollama is not installed</Text>
-        </Box>
-      );
-
-    case 'prompt-install':
-      return (
-        <Box flexDirection="column">
-          <Box>
-            <Text color="yellow">⚠</Text>
-            <Text> Ollama is not installed</Text>
-          </Box>
-          <Box marginTop={1}>
-            {onInstallConfirm && (
-              <ConfirmInput
-                message="Install Ollama now?"
-                onConfirm={onInstallConfirm}
-                defaultValue={true}
-                isInteractive={isInteractive}
-              />
-            )}
-          </Box>
-          <Box marginLeft={2} marginTop={1}>
-            <Text color="gray">
-              (Uses official installer: curl -fsSL https://ollama.com/install.sh | sh)
-            </Text>
-          </Box>
-        </Box>
-      );
-
-    case 'installing':
-      return (
-        <Box>
-          <Text color="cyan">
-            <Spinner type="dots" />
-          </Text>
-          <Text> {state.progress || 'Installing Ollama...'}</Text>
-        </Box>
-      );
-
-    case 'not-running':
-      return (
-        <Box>
-          <Text color="yellow">⚠</Text>
-          <Text> Ollama is installed but not running</Text>
-        </Box>
-      );
-
-    case 'prompt-start':
-      return (
-        <Box flexDirection="column">
-          <Box>
-            <Text color="yellow">⚠</Text>
-            <Text> Ollama is installed but not running</Text>
-          </Box>
-          <Box marginTop={1}>
-            {onStartConfirm && (
-              <ConfirmInput
-                message="Start Ollama now?"
-                onConfirm={onStartConfirm}
-                defaultValue={true}
-                isInteractive={isInteractive}
-              />
-            )}
-          </Box>
-        </Box>
-      );
-
-    case 'starting':
-      return (
-        <Box>
-          <Text color="cyan">
-            <Spinner type="dots" />
-          </Text>
-          <Text> Starting Ollama...</Text>
-        </Box>
-      );
-
-    case 'checking-model':
-      return (
-        <Box>
-          <Text color="cyan">
-            <Spinner type="dots" />
-          </Text>
-          <Text> Checking model: {state.model}...</Text>
-        </Box>
-      );
-
-    case 'pulling-model': {
-      const { pullProgress } = state;
-      const completed = pullProgress?.completed ?? 0;
-      const total = pullProgress?.total ?? 0;
-      const hasProgress = total > 0 && completed > 0;
-      const percent = hasProgress ? Math.round((completed / total) * 100) : 0;
-      const barWidth = 20;
-      const filled = hasProgress ? Math.round((percent / 100) * barWidth) : 0;
-      const bar = hasProgress ? '█'.repeat(filled) + '░'.repeat(barWidth - filled) : '';
-
-      // Format bytes
-      const formatBytes = (bytes: number) => {
-        if (bytes >= 1e9) return `${(bytes / 1e9).toFixed(1)} GB`;
-        if (bytes >= 1e6) return `${(bytes / 1e6).toFixed(0)} MB`;
-        return `${bytes} B`;
-      };
-
-      return (
-        <Box flexDirection="column">
-          <Box>
-            <Text color="cyan">↓</Text>
-            <Text> Pulling {state.model}</Text>
-          </Box>
-          {hasProgress ? (
-            <Box marginLeft={2}>
-              <Text color="cyan">{bar}</Text>
-              <Text color="gray">
-                {' '}
-                {percent}% ({formatBytes(completed)} / {formatBytes(total)})
-              </Text>
-            </Box>
-          ) : (
-            <Box marginLeft={2}>
-              <Text color="cyan">
-                <Spinner type="dots" />
-              </Text>
-              <Text color="gray"> {pullProgress?.status || 'Connecting...'}</Text>
-            </Box>
-          )}
-        </Box>
-      );
-    }
-
-    case 'ready':
-      return (
-        <Box>
-          <Text color="green">✓</Text>
-          <Text> Ollama ready: {state.model}</Text>
-        </Box>
-      );
-
-    case 'error':
-      return (
-        <Box flexDirection="column">
-          <Box>
-            <Text color="red">✗</Text>
-            <Text> Ollama error: {state.message}</Text>
-          </Box>
-        </Box>
-      );
-
-    case 'cancelled':
-      return (
-        <Box flexDirection="column">
-          <Box>
-            <Text color="gray">─</Text>
-            <Text> Cancelled</Text>
-          </Box>
-          <Box marginLeft={2} marginTop={1} flexDirection="column">
-            <Text color="gray">To install manually:</Text>
-            <Text color="cyan"> https://ollama.com/download</Text>
-            <Box marginTop={1}>
-              <Text color="gray">Or use cloud AI:</Text>
-            </Box>
-            <Text color="white"> doc extract file.pdf --provider gemini</Text>
-          </Box>
-        </Box>
-      );
-  }
-}
diff --git a/packages/cli/src/components/Result.tsx b/packages/cli/src/components/Result.tsx
deleted file mode 100644
index 20064c8..0000000
--- a/packages/cli/src/components/Result.tsx
+++ /dev/null
@@ -1,59 +0,0 @@
-import type { DocumentData } from '@doc-agent/core';
-import { Box, Text } from 'ink';
-
-interface ResultProps {
-  data: DocumentData;
-  showJson?: boolean;
-}
-
-export function Result({ data, showJson = true }: ResultProps) {
-  if (showJson) {
-    return (
-      <Box flexDirection="column" marginTop={1}>
-        <Text color="gray">─────────────────────────────────────</Text>
-        <Text>{JSON.stringify(data, null, 2)}</Text>
-      </Box>
-    );
-  }
-
-  return (
-    <Box flexDirection="column" marginTop={1}>
-      <Text color="gray">─────────────────────────────────────</Text>
-      <Box>
-        <Text color="gray">Type: </Text>
-        <Text>{data.type}</Text>
-      </Box>
-      {data.vendor && (
-        <Box>
-          <Text color="gray">Vendor: </Text>
-          <Text>{data.vendor}</Text>
-        </Box>
-      )}
-      {data.amount !== undefined && (
-        <Box>
-          <Text color="gray">Amount: </Text>
-          <Text color="green">${data.amount.toFixed(2)}</Text>
-        </Box>
-      )}
-      {data.date && (
-        <Box>
-          <Text color="gray">Date: </Text>
-          <Text>{data.date}</Text>
-        </Box>
-      )}
-      {data.items && data.items.length > 0 && (
-        <Box flexDirection="column" marginTop={1}>
-          <Text color="gray">Items:</Text>
-          {data.items.map((item) => (
-            <Box key={item.description} marginLeft={2}>
-              <Text>
-                • {item.description}
-                {item.total !== undefined && <Text color="green"> ${item.total.toFixed(2)}</Text>}
-              </Text>
-            </Box>
-          ))}
-        </Box>
-      )}
-    </Box>
-  );
-}
diff --git a/packages/cli/src/components/StreamingOutput.tsx b/packages/cli/src/components/StreamingOutput.tsx
deleted file mode 100644
index fca1aab..0000000
--- a/packages/cli/src/components/StreamingOutput.tsx
+++ /dev/null
@@ -1,23 +0,0 @@
-import { Box, Text } from 'ink';
-
-interface StreamingOutputProps {
-  content: string;
-  maxLines?: number;
-}
-
-export function StreamingOutput({ content, maxLines = 10 }: StreamingOutputProps) {
-  if (!content) return null;
-
-  // Show last N lines of content to keep it readable
-  const lines = content.split('\n');
-  const displayLines = lines.slice(-maxLines);
-  const truncated = lines.length > maxLines;
-
-  return (
-    <Box flexDirection="column" marginTop={1}>
-      <Text color="gray">─── Response ───</Text>
-      {truncated && <Text color="gray">...</Text>}
-      <Text>{displayLines.join('\n')}</Text>
-    </Box>
-  );
-}
diff --git a/packages/cli/src/components/__tests__/OllamaStatus.test.tsx b/packages/cli/src/components/__tests__/OllamaStatus.test.tsx
deleted file mode 100644
index 85dc0a8..0000000
--- a/packages/cli/src/components/__tests__/OllamaStatus.test.tsx
+++ /dev/null
@@ -1,92 +0,0 @@
-import { render } from 'ink-testing-library';
-import React from 'react';
-import { describe, expect, it, vi } from 'vitest';
-import { type OllamaState, OllamaStatus } from '../OllamaStatus';
-
-describe('OllamaStatus', () => {
-  const defaultProps = {
-    isInteractive: true,
-    onInstallConfirm: undefined,
-    onStartConfirm: undefined,
-  };
-
-  it('should render checking state', () => {
-    const state: OllamaState = { status: 'checking' };
-    const { lastFrame } = render(React.createElement(OllamaStatus, { ...defaultProps, state }));
-
-    expect(lastFrame()).toContain('Checking Ollama');
-  });
-
-  it('should render ready state with model name', () => {
-    const state: OllamaState = { status: 'ready', model: 'llama3.2-vision' };
-    const { lastFrame } = render(React.createElement(OllamaStatus, { ...defaultProps, state }));
-
-    expect(lastFrame()).toContain('Ollama ready');
-    expect(lastFrame()).toContain('llama3.2-vision');
-  });
-
-  it('should render pulling-model state', () => {
-    const state: OllamaState = { status: 'pulling-model', model: 'test-model' };
-    const { lastFrame } = render(React.createElement(OllamaStatus, { ...defaultProps, state }));
-
-    expect(lastFrame()).toContain('Pulling test-model');
-  });
-
-  it('should render pulling-model state with progress', () => {
-    const state: OllamaState = {
-      status: 'pulling-model',
-      model: 'test-model',
-      pullProgress: { status: 'pulling', completed: 500000000, total: 1000000000 },
-    };
-    const { lastFrame } = render(React.createElement(OllamaStatus, { ...defaultProps, state }));
-
-    expect(lastFrame()).toContain('Pulling test-model');
-    expect(lastFrame()).toContain('50%');
-    expect(lastFrame()).toContain('500.0 MB');
-  });
-
-  it('should render error state', () => {
-    const state: OllamaState = { status: 'error', message: 'Connection failed' };
-    const { lastFrame } = render(React.createElement(OllamaStatus, { ...defaultProps, state }));
-
-    expect(lastFrame()).toContain('error');
-    expect(lastFrame()).toContain('Connection failed');
-  });
-
-  it('should render cancelled state', () => {
-    const state: OllamaState = { status: 'cancelled' };
-    const { lastFrame } = render(React.createElement(OllamaStatus, { ...defaultProps, state }));
-
-    expect(lastFrame()).toContain('Cancelled');
-  });
-
-  it('should render prompt-install state', () => {
-    const state: OllamaState = { status: 'prompt-install' };
-    const onInstallConfirm = vi.fn();
-    const { lastFrame } = render(
-      React.createElement(OllamaStatus, {
-        ...defaultProps,
-        state,
-        onInstallConfirm,
-      })
-    );
-
-    expect(lastFrame()).toContain('Ollama is not installed');
-    expect(lastFrame()).toContain('Install Ollama');
-  });
-
-  it('should render prompt-start state', () => {
-    const state: OllamaState = { status: 'prompt-start' };
-    const onStartConfirm = vi.fn();
-    const { lastFrame } = render(
-      React.createElement(OllamaStatus, {
-        ...defaultProps,
-        state,
-        onStartConfirm,
-      })
-    );
-
-    expect(lastFrame()).toContain('not running');
-    expect(lastFrame()).toContain('Start Ollama');
-  });
-});
diff --git a/packages/cli/src/components/index.ts b/packages/cli/src/components/index.ts
deleted file mode 100644
index 03ab998..0000000
--- a/packages/cli/src/components/index.ts
+++ /dev/null
@@ -1,7 +0,0 @@
-export { ConfirmInput } from './ConfirmInput';
-export { ErrorDisplay } from './ErrorDisplay';
-export { ExtractApp } from './ExtractApp';
-export { ExtractionProgress, type ExtractionState } from './ExtractionProgress';
-export { type OllamaState, OllamaStatus } from './OllamaStatus';
-export { Result } from './Result';
-export { StreamingOutput } from './StreamingOutput';
diff --git a/packages/cli/src/contexts/ExtractionContext.tsx b/packages/cli/src/contexts/ExtractionContext.tsx
deleted file mode 100644
index d893d96..0000000
--- a/packages/cli/src/contexts/ExtractionContext.tsx
+++ /dev/null
@@ -1,39 +0,0 @@
-import type { Config, DocumentData } from '@doc-agent/core';
-import { type ExtractOptions, extractDocument } from '@doc-agent/extract';
-import { storage } from '@doc-agent/storage';
-import { createContext, type ReactNode, useContext } from 'react';
-
-// Service interface for dependency injection
-export interface ExtractionService {
-  extractDocument: (
-    filePath: string,
-    config: Config,
-    options?: ExtractOptions
-  ) => Promise<DocumentData>;
-  saveDocument: (doc: DocumentData, filePath: string) => Promise<void>;
-}
-
-// Default implementation uses real services
-const defaultExtractionService: ExtractionService = {
-  extractDocument,
-  saveDocument: storage.saveDocument.bind(storage),
-};
-
-const ExtractionContext = createContext<ExtractionService>(defaultExtractionService);
-
-export interface ExtractionProviderProps {
-  children: ReactNode;
-  service?: ExtractionService;
-}
-
-export function ExtractionProvider({ children, service }: ExtractionProviderProps) {
-  return (
-    <ExtractionContext.Provider value={service ?? defaultExtractionService}>
-      {children}
-    </ExtractionContext.Provider>
-  );
-}
-
-export function useExtractionService(): ExtractionService {
-  return useContext(ExtractionContext);
-}
diff --git a/packages/cli/src/contexts/OllamaContext.tsx b/packages/cli/src/contexts/OllamaContext.tsx
deleted file mode 100644
index 7826e7a..0000000
--- a/packages/cli/src/contexts/OllamaContext.tsx
+++ /dev/null
@@ -1,43 +0,0 @@
-import { createContext, type ReactNode, useContext } from 'react';
-import * as ollamaService from '../services/ollama';
-
-// Service interface for dependency injection
-export interface OllamaService {
-  checkOllamaInstalled: typeof ollamaService.checkOllamaInstalled;
-  checkOllamaRunning: typeof ollamaService.checkOllamaRunning;
-  installOllama: typeof ollamaService.installOllama;
-  startOllama: typeof ollamaService.startOllama;
-  waitForOllama: typeof ollamaService.waitForOllama;
-  checkModelExists: typeof ollamaService.checkModelExists;
-  pullModel: typeof ollamaService.pullModel;
-}
-
-// Default implementation uses real service
-const defaultOllamaService: OllamaService = {
-  checkOllamaInstalled: ollamaService.checkOllamaInstalled,
-  checkOllamaRunning: ollamaService.checkOllamaRunning,
-  installOllama: ollamaService.installOllama,
-  startOllama: ollamaService.startOllama,
-  waitForOllama: ollamaService.waitForOllama,
-  checkModelExists: ollamaService.checkModelExists,
-  pullModel: ollamaService.pullModel,
-};
-
-const OllamaContext = createContext<OllamaService>(defaultOllamaService);
-
-export interface OllamaProviderProps {
-  children: ReactNode;
-  service?: OllamaService;
-}
-
-export function OllamaProvider({ children, service }: OllamaProviderProps) {
-  return (
-    <OllamaContext.Provider value={service ?? defaultOllamaService}>
-      {children}
-    </OllamaContext.Provider>
-  );
-}
-
-export function useOllamaService(): OllamaService {
-  return useContext(OllamaContext);
-}
diff --git a/packages/cli/src/contexts/index.ts b/packages/cli/src/contexts/index.ts
deleted file mode 100644
index 1379339..0000000
--- a/packages/cli/src/contexts/index.ts
+++ /dev/null
@@ -1,12 +0,0 @@
-export {
-  ExtractionProvider,
-  type ExtractionProviderProps,
-  type ExtractionService,
-  useExtractionService,
-} from './ExtractionContext';
-export {
-  OllamaProvider,
-  type OllamaProviderProps,
-  type OllamaService,
-  useOllamaService,
-} from './OllamaContext';
diff --git a/packages/cli/src/hooks/__tests__/useExtraction.test.ts b/packages/cli/src/hooks/__tests__/useExtraction.test.ts
deleted file mode 100644
index d1b5991..0000000
--- a/packages/cli/src/hooks/__tests__/useExtraction.test.ts
+++ /dev/null
@@ -1,247 +0,0 @@
-/**
- * @vitest-environment jsdom
- */
-import type { DocumentData } from '@doc-agent/core';
-import { renderHook, waitFor } from '@testing-library/react';
-import React, { type ReactNode } from 'react';
-import { beforeEach, describe, expect, it, vi } from 'vitest';
-import { ExtractionProvider, type ExtractionService } from '../../contexts';
-import { useExtraction } from '../useExtraction';
-
-// Create mock extraction service
-const createMockExtractionService = (
-  overrides: Partial<ExtractionService> = {}
-): ExtractionService => ({
-  extractDocument: vi.fn().mockResolvedValue({
-    id: 'test-id',
-    filename: 'test.pdf',
-    extractedAt: new Date(),
-    type: 'receipt',
-    vendor: 'Test Vendor',
-    amount: 100,
-  } as DocumentData),
-  saveDocument: vi.fn().mockResolvedValue(undefined),
-  ...overrides,
-});
-
-// Wrapper component for providing context
-const createWrapper = (service: ExtractionService) => {
-  return function Wrapper({ children }: { children: ReactNode }) {
-    return React.createElement(ExtractionProvider, { service }, children);
-  };
-};
-
-describe('useExtraction', () => {
-  beforeEach(() => {
-    vi.clearAllMocks();
-  });
-
-  it('should start in idle state', () => {
-    const mockService = createMockExtractionService();
-
-    const { result } = renderHook(
-      () =>
-        useExtraction({
-          file: '/path/to/test.pdf',
-          provider: 'ollama',
-          model: 'llama3.2-vision',
-          dryRun: false,
-          shouldStart: false, // Don't start extraction
-          onComplete: vi.fn(),
-          onError: vi.fn(),
-        }),
-      { wrapper: createWrapper(mockService) }
-    );
-
-    expect(result.current.state.status).toBe('idle');
-    expect(result.current.result).toBeNull();
-  });
-
-  it('should extract and save when shouldStart is true', async () => {
-    const mockService = createMockExtractionService();
-    const onComplete = vi.fn();
-    const onError = vi.fn();
-
-    const { result } = renderHook(
-      () =>
-        useExtraction({
-          file: '/path/to/test.pdf',
-          provider: 'ollama',
-          model: 'llama3.2-vision',
-          dryRun: false,
-          shouldStart: true,
-          onComplete,
-          onError,
-        }),
-      { wrapper: createWrapper(mockService) }
-    );
-
-    await waitFor(() => {
-      expect(result.current.state.status).toBe('complete');
-    });
-
-    expect(mockService.extractDocument).toHaveBeenCalled();
-    expect(mockService.saveDocument).toHaveBeenCalled();
-    expect(onComplete).toHaveBeenCalled();
-    expect(onError).not.toHaveBeenCalled();
-    expect(result.current.result).not.toBeNull();
-    expect(result.current.result?.id).toBe('test-id');
-  });
-
-  it('should skip saving in dry run mode', async () => {
-    const mockService = createMockExtractionService();
-    const onComplete = vi.fn();
-
-    const { result } = renderHook(
-      () =>
-        useExtraction({
-          file: '/path/to/test.pdf',
-          provider: 'ollama',
-          model: 'llama3.2-vision',
-          dryRun: true, // Dry run - skip save
-          shouldStart: true,
-          onComplete,
-          onError: vi.fn(),
-        }),
-      { wrapper: createWrapper(mockService) }
-    );
-
-    await waitFor(() => {
-      expect(result.current.state.status).toBe('complete');
-    });
-
-    expect(mockService.extractDocument).toHaveBeenCalled();
-    expect(mockService.saveDocument).not.toHaveBeenCalled(); // Should not save
-    expect(onComplete).toHaveBeenCalled();
-  });
-
-  it('should handle extraction errors', async () => {
-    const mockService = createMockExtractionService({
-      extractDocument: vi.fn().mockRejectedValue(new Error('Extraction failed')),
-    });
-    const onError = vi.fn();
-
-    const { result } = renderHook(
-      () =>
-        useExtraction({
-          file: '/path/to/test.pdf',
-          provider: 'ollama',
-          model: 'llama3.2-vision',
-          dryRun: false,
-          shouldStart: true,
-          onComplete: vi.fn(),
-          onError,
-        }),
-      { wrapper: createWrapper(mockService) }
-    );
-
-    await waitFor(() => {
-      expect(result.current.state.status).toBe('error');
-    });
-
-    expect(result.current.state).toEqual({
-      status: 'error',
-      message: 'Extraction failed',
-    });
-    expect(onError).toHaveBeenCalled();
-  });
-
-  it('should handle save errors', async () => {
-    const mockService = createMockExtractionService({
-      saveDocument: vi.fn().mockRejectedValue(new Error('Save failed')),
-    });
-    const onError = vi.fn();
-
-    const { result } = renderHook(
-      () =>
-        useExtraction({
-          file: '/path/to/test.pdf',
-          provider: 'ollama',
-          model: 'llama3.2-vision',
-          dryRun: false,
-          shouldStart: true,
-          onComplete: vi.fn(),
-          onError,
-        }),
-      { wrapper: createWrapper(mockService) }
-    );
-
-    await waitFor(() => {
-      expect(result.current.state.status).toBe('error');
-    });
-
-    expect(result.current.state).toEqual({
-      status: 'error',
-      message: 'Save failed',
-    });
-    expect(onError).toHaveBeenCalled();
-  });
-
-  it('should pass correct config for gemini provider', async () => {
-    const mockService = createMockExtractionService();
-
-    renderHook(
-      () =>
-        useExtraction({
-          file: '/path/to/test.pdf',
-          provider: 'gemini',
-          model: 'gemini-2.5-flash',
-          dryRun: true,
-          shouldStart: true,
-          onComplete: vi.fn(),
-          onError: vi.fn(),
-        }),
-      { wrapper: createWrapper(mockService) }
-    );
-
-    await waitFor(() => {
-      expect(mockService.extractDocument).toHaveBeenCalled();
-    });
-
-    const callArgs = (mockService.extractDocument as ReturnType<typeof vi.fn>).mock.calls[0];
-    expect(callArgs[1].aiProvider).toBe('gemini');
-    expect(callArgs[1].geminiModel).toBe('gemini-2.5-flash');
-  });
-
-  it('should handle streaming callbacks', async () => {
-    const mockExtract = vi.fn().mockImplementation(async (_file, _config, options) => {
-      // Simulate streaming
-      options?.onStream?.({ type: 'prompt', content: 'System prompt...' });
-      options?.onStream?.({ type: 'response', content: '{"type": "receipt"}' });
-
-      return {
-        id: 'test-id',
-        filename: 'test.pdf',
-        extractedAt: new Date(),
-        type: 'receipt',
-      } as DocumentData;
-    });
-
-    const mockService = createMockExtractionService({
-      extractDocument: mockExtract,
-    });
-
-    const { result } = renderHook(
-      () =>
-        useExtraction({
-          file: '/path/to/test.pdf',
-          provider: 'ollama',
-          model: 'llama3.2-vision',
-          dryRun: true,
-          shouldStart: true,
-          onComplete: vi.fn(),
-          onError: vi.fn(),
-        }),
-      { wrapper: createWrapper(mockService) }
-    );
-
-    await waitFor(() => {
-      expect(result.current.state.status).toBe('complete');
-    });
-
-    // Streaming content should be captured
-    expect(result.current.promptContent).toBe('System prompt...');
-    // Response content may be throttled, just check it was called
-    expect(mockExtract).toHaveBeenCalled();
-  });
-});
diff --git a/packages/cli/src/hooks/__tests__/useOllama.test.ts b/packages/cli/src/hooks/__tests__/useOllama.test.ts
deleted file mode 100644
index 67e4a19..0000000
--- a/packages/cli/src/hooks/__tests__/useOllama.test.ts
+++ /dev/null
@@ -1,311 +0,0 @@
-/**
- * @vitest-environment jsdom
- */
-import { renderHook, waitFor } from '@testing-library/react';
-import React, { type ReactNode } from 'react';
-import { beforeEach, describe, expect, it, vi } from 'vitest';
-import { OllamaProvider, type OllamaService } from '../../contexts';
-import { useOllama } from '../useOllama';
-
-// Create mock service
-const createMockOllamaService = (overrides: Partial<OllamaService> = {}): OllamaService => ({
-  checkOllamaInstalled: vi.fn().mockResolvedValue(true),
-  checkOllamaRunning: vi.fn().mockResolvedValue(true),
-  installOllama: vi.fn().mockResolvedValue(undefined),
-  startOllama: vi.fn(),
-  waitForOllama: vi.fn().mockResolvedValue(true),
-  checkModelExists: vi.fn().mockResolvedValue(true),
-  pullModel: vi.fn().mockResolvedValue(undefined),
-  ...overrides,
-});
-
-// Wrapper component for providing context
-const createWrapper = (service: OllamaService) => {
-  return function Wrapper({ children }: { children: ReactNode }) {
-    return React.createElement(OllamaProvider, { service }, children);
-  };
-};
-
-describe('useOllama', () => {
-  beforeEach(() => {
-    vi.clearAllMocks();
-  });
-
-  it('should skip Ollama check for non-Ollama providers', async () => {
-    const mockService = createMockOllamaService();
-
-    const { result } = renderHook(
-      () =>
-        useOllama({
-          provider: 'gemini',
-          model: 'gemini-2.5-flash',
-          isInteractive: false,
-        }),
-      { wrapper: createWrapper(mockService) }
-    );
-
-    await waitFor(() => {
-      expect(result.current.state.status).toBe('ready');
-    });
-
-    expect(result.current.isReady).toBe(true);
-    expect(mockService.checkOllamaRunning).not.toHaveBeenCalled();
-  });
-
-  it('should check if Ollama is running for Ollama provider', async () => {
-    const mockService = createMockOllamaService();
-
-    const { result } = renderHook(
-      () =>
-        useOllama({
-          provider: 'ollama',
-          model: 'llama3.2-vision',
-          isInteractive: false,
-        }),
-      { wrapper: createWrapper(mockService) }
-    );
-
-    await waitFor(() => {
-      expect(result.current.state.status).toBe('ready');
-    });
-
-    expect(mockService.checkOllamaRunning).toHaveBeenCalled();
-    expect(mockService.checkModelExists).toHaveBeenCalledWith('llama3.2-vision');
-  });
-
-  it('should prompt for install when Ollama is not installed', async () => {
-    const mockService = createMockOllamaService({
-      checkOllamaRunning: vi.fn().mockResolvedValue(false),
-      checkOllamaInstalled: vi.fn().mockResolvedValue(false),
-    });
-
-    const { result } = renderHook(
-      () =>
-        useOllama({
-          provider: 'ollama',
-          model: 'llama3.2-vision',
-          isInteractive: true, // Interactive mode - will prompt
-        }),
-      { wrapper: createWrapper(mockService) }
-    );
-
-    await waitFor(() => {
-      expect(result.current.state.status).toBe('prompt-install');
-    });
-
-    expect(result.current.isReady).toBe(false);
-  });
-
-  it('should prompt to start when Ollama is installed but not running', async () => {
-    const mockService = createMockOllamaService({
-      checkOllamaRunning: vi.fn().mockResolvedValue(false),
-      checkOllamaInstalled: vi.fn().mockResolvedValue(true),
-    });
-
-    const { result } = renderHook(
-      () =>
-        useOllama({
-          provider: 'ollama',
-          model: 'llama3.2-vision',
-          isInteractive: true,
-        }),
-      { wrapper: createWrapper(mockService) }
-    );
-
-    await waitFor(() => {
-      expect(result.current.state.status).toBe('prompt-start');
-    });
-  });
-
-  it('should pull model when it does not exist', async () => {
-    const mockService = createMockOllamaService({
-      checkModelExists: vi.fn().mockResolvedValue(false),
-    });
-
-    const { result } = renderHook(
-      () =>
-        useOllama({
-          provider: 'ollama',
-          model: 'new-model',
-          isInteractive: false,
-        }),
-      { wrapper: createWrapper(mockService) }
-    );
-
-    await waitFor(() => {
-      expect(result.current.state.status).toBe('ready');
-    });
-
-    expect(mockService.pullModel).toHaveBeenCalledWith('new-model', expect.any(Function));
-  });
-
-  it('should handle install confirmation decline', async () => {
-    const mockService = createMockOllamaService({
-      checkOllamaRunning: vi.fn().mockResolvedValue(false),
-      checkOllamaInstalled: vi.fn().mockResolvedValue(false),
-    });
-
-    const { result } = renderHook(
-      () =>
-        useOllama({
-          provider: 'ollama',
-          model: 'llama3.2-vision',
-          isInteractive: true,
-        }),
-      { wrapper: createWrapper(mockService) }
-    );
-
-    await waitFor(() => {
-      expect(result.current.state.status).toBe('prompt-install');
-    });
-
-    // Decline installation
-    await result.current.handleInstallConfirm(false);
-
-    await waitFor(() => {
-      expect(result.current.state.status).toBe('cancelled');
-    });
-  });
-
-  it('should handle start confirmation and proceed to model check', async () => {
-    const mockService = createMockOllamaService({
-      checkOllamaRunning: vi.fn().mockResolvedValue(false),
-      checkOllamaInstalled: vi.fn().mockResolvedValue(true),
-    });
-
-    const { result } = renderHook(
-      () =>
-        useOllama({
-          provider: 'ollama',
-          model: 'llama3.2-vision',
-          isInteractive: true,
-        }),
-      { wrapper: createWrapper(mockService) }
-    );
-
-    await waitFor(() => {
-      expect(result.current.state.status).toBe('prompt-start');
-    });
-
-    // Accept start
-    await result.current.handleStartConfirm(true);
-
-    await waitFor(() => {
-      expect(result.current.state.status).toBe('ready');
-    });
-
-    expect(mockService.startOllama).toHaveBeenCalled();
-    expect(mockService.waitForOllama).toHaveBeenCalled();
-  });
-
-  it('should handle start confirmation decline', async () => {
-    const mockService = createMockOllamaService({
-      checkOllamaRunning: vi.fn().mockResolvedValue(false),
-      checkOllamaInstalled: vi.fn().mockResolvedValue(true),
-    });
-
-    const { result } = renderHook(
-      () =>
-        useOllama({
-          provider: 'ollama',
-          model: 'llama3.2-vision',
-          isInteractive: true,
-        }),
-      { wrapper: createWrapper(mockService) }
-    );
-
-    await waitFor(() => {
-      expect(result.current.state.status).toBe('prompt-start');
-    });
-
-    // Decline start
-    await result.current.handleStartConfirm(false);
-
-    await waitFor(() => {
-      expect(result.current.state.status).toBe('cancelled');
-    });
-  });
-
-  it('should show error when Ollama fails to start', async () => {
-    const mockService = createMockOllamaService({
-      checkOllamaRunning: vi.fn().mockResolvedValue(false),
-      checkOllamaInstalled: vi.fn().mockResolvedValue(true),
-      waitForOllama: vi.fn().mockResolvedValue(false), // Fails to start
-    });
-
-    const { result } = renderHook(
-      () =>
-        useOllama({
-          provider: 'ollama',
-          model: 'llama3.2-vision',
-          isInteractive: true,
-        }),
-      { wrapper: createWrapper(mockService) }
-    );
-
-    await waitFor(() => {
-      expect(result.current.state.status).toBe('prompt-start');
-    });
-
-    await result.current.handleStartConfirm(true);
-
-    await waitFor(() => {
-      expect(result.current.state.status).toBe('error');
-    });
-
-    expect(result.current.state).toEqual({
-      status: 'error',
-      message: 'Ollama failed to start within 10 seconds',
-    });
-  });
-
-  it('should auto-confirm install in non-interactive mode', async () => {
-    const mockService = createMockOllamaService({
-      checkOllamaRunning: vi.fn().mockResolvedValue(false),
-      checkOllamaInstalled: vi.fn().mockResolvedValue(false),
-    });
-
-    renderHook(
-      () =>
-        useOllama({
-          provider: 'ollama',
-          model: 'llama3.2-vision',
-          isInteractive: false, // Non-interactive
-        }),
-      { wrapper: createWrapper(mockService) }
-    );
-
-    // Should auto-confirm and proceed to install
-    await waitFor(
-      () => {
-        expect(mockService.installOllama).toHaveBeenCalled();
-      },
-      { timeout: 2000 }
-    );
-  });
-
-  it('should auto-confirm start in non-interactive mode', async () => {
-    const mockService = createMockOllamaService({
-      checkOllamaRunning: vi.fn().mockResolvedValue(false),
-      checkOllamaInstalled: vi.fn().mockResolvedValue(true),
-    });
-
-    renderHook(
-      () =>
-        useOllama({
-          provider: 'ollama',
-          model: 'llama3.2-vision',
-          isInteractive: false, // Non-interactive
-        }),
-      { wrapper: createWrapper(mockService) }
-    );
-
-    // Should auto-confirm and proceed to start
-    await waitFor(
-      () => {
-        expect(mockService.startOllama).toHaveBeenCalled();
-      },
-      { timeout: 2000 }
-    );
-  });
-});
diff --git a/packages/cli/src/hooks/index.ts b/packages/cli/src/hooks/index.ts
deleted file mode 100644
index f88ba64..0000000
--- a/packages/cli/src/hooks/index.ts
+++ /dev/null
@@ -1,6 +0,0 @@
-export {
-  type UseExtractionOptions,
-  type UseExtractionResult,
-  useExtraction,
-} from './useExtraction';
-export { type UseOllamaOptions, type UseOllamaResult, useOllama } from './useOllama';
diff --git a/packages/cli/src/hooks/useExtraction.ts b/packages/cli/src/hooks/useExtraction.ts
deleted file mode 100644
index 12e2956..0000000
--- a/packages/cli/src/hooks/useExtraction.ts
+++ /dev/null
@@ -1,121 +0,0 @@
-import { resolve } from 'node:path';
-import type { Config, DocumentData } from '@doc-agent/core';
-import type { StreamChunk } from '@doc-agent/extract';
-import { useCallback, useEffect, useRef, useState } from 'react';
-import type { ExtractionState } from '../components/ExtractionProgress';
-import { useExtractionService } from '../contexts/ExtractionContext';
-
-export interface UseExtractionOptions {
-  file: string;
-  provider: 'gemini' | 'openai' | 'ollama';
-  model: string;
-  dryRun: boolean;
-  shouldStart: boolean;
-  onComplete: () => void;
-  onError: (error: Error) => void;
-}
-
-export interface UseExtractionResult {
-  state: ExtractionState;
-  result: DocumentData | null;
-  promptContent: string;
-  responseContent: string;
-}
-
-export function useExtraction({
-  file,
-  provider,
-  model,
-  dryRun,
-  shouldStart,
-  onComplete,
-  onError,
-}: UseExtractionOptions): UseExtractionResult {
-  const [state, setState] = useState<ExtractionState>({ status: 'idle' });
-  const [result, setResult] = useState<DocumentData | null>(null);
-  const [promptContent, setPromptContent] = useState('');
-  const [responseContent, setResponseContent] = useState('');
-  const responseRef = useRef('');
-  const lastUpdateRef = useRef(0);
-  const updateTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
-  const extractionService = useExtractionService();
-
-  const runExtraction = useCallback(async () => {
-    try {
-      setState({ status: 'extracting', startTime: Date.now() });
-      setPromptContent('');
-      responseRef.current = '';
-      setResponseContent('');
-
-      const config: Config = {
-        aiProvider: provider,
-        geminiApiKey: process.env.GEMINI_API_KEY,
-        geminiModel: provider === 'gemini' ? model : undefined,
-        openaiApiKey: process.env.OPENAI_API_KEY,
-        ollamaModel: provider === 'ollama' ? model : undefined,
-      };
-
-      const THROTTLE_MS = 250;
-
-      const extractedData = await extractionService.extractDocument(file, config, {
-        onStream: (chunk: StreamChunk) => {
-          if (chunk.type === 'prompt') {
-            setPromptContent(chunk.content);
-          } else if (chunk.type === 'response') {
-            responseRef.current += chunk.content;
-
-            const now = Date.now();
-            if (now - lastUpdateRef.current >= THROTTLE_MS) {
-              lastUpdateRef.current = now;
-              setResponseContent(responseRef.current);
-            } else if (!updateTimerRef.current) {
-              updateTimerRef.current = setTimeout(() => {
-                updateTimerRef.current = null;
-                lastUpdateRef.current = Date.now();
-                setResponseContent(responseRef.current);
-              }, THROTTLE_MS);
-            }
-          }
-        },
-      });
-
-      if (updateTimerRef.current) {
-        clearTimeout(updateTimerRef.current);
-        updateTimerRef.current = null;
-      }
-      setResponseContent(responseRef.current);
-      setResult(extractedData);
-
-      if (!dryRun) {
-        setState({ status: 'saving' });
-        const absolutePath = resolve(file);
-        await extractionService.saveDocument(extractedData, absolutePath);
-      }
-
-      setState({
-        status: 'complete',
-        id: extractedData.id,
-        filename: extractedData.filename,
-      });
-
-      onComplete();
-    } catch (error) {
-      const err = error as Error;
-      setState({ status: 'error', message: err.message });
-      onError(err);
-    }
-  }, [file, provider, model, dryRun, onComplete, onError, extractionService]);
-
-  useEffect(() => {
-    if (shouldStart && state.status === 'idle') {
-      runExtraction();
-    }
-  }, [shouldStart, state.status, runExtraction]);
-
-  return {
-    state,
-    result,
-    promptContent,
-    responseContent,
-  };
-}
diff --git a/packages/cli/src/hooks/useOllama.ts b/packages/cli/src/hooks/useOllama.ts
deleted file mode 100644
index 8574faf..0000000
--- a/packages/cli/src/hooks/useOllama.ts
+++ /dev/null
@@ -1,138 +0,0 @@
-import { useCallback, useEffect, useState } from 'react';
-import type { OllamaState, PullProgress } from '../components/OllamaStatus';
-import { useOllamaService } from '../contexts/OllamaContext';
-
-export interface UseOllamaOptions {
-  provider: 'gemini' | 'openai' | 'ollama';
-  model: string;
-  isInteractive: boolean;
-}
-
-export interface UseOllamaResult {
-  state: OllamaState;
-  isReady: boolean;
-  handleInstallConfirm: (confirmed: boolean) => Promise<void>;
-  handleStartConfirm: (confirmed: boolean) => Promise<void>;
-}
-
-export function useOllama({ provider, model, isInteractive }: UseOllamaOptions): UseOllamaResult {
-  const [state, setState] = useState<OllamaState>({ status: 'checking' });
-  const [isReady, setIsReady] = useState(false);
-  const ollamaService = useOllamaService();
-
-  // Check model and pull if needed (declared first since other callbacks depend on it)
-  const checkAndPullModel = useCallback(async () => {
-    setState({ status: 'checking-model', model });
-    const modelExists = await ollamaService.checkModelExists(model);
-
-    if (!modelExists) {
-      setState({ status: 'pulling-model', model });
-      await ollamaService.pullModel(model, (progress: PullProgress) => {
-        setState({ status: 'pulling-model', model, pullProgress: progress });
-      });
-    }
-
-    setState({ status: 'ready', model });
-    setIsReady(true);
-  }, [model, ollamaService]);
-
-  // Handle install confirmation
-  const handleInstallConfirm = useCallback(
-    async (confirmed: boolean) => {
-      if (!confirmed) {
-        setState({ status: 'cancelled' });
-        return;
-      }
-
-      setState({ status: 'installing' });
-      try {
-        await ollamaService.installOllama((progress) => {
-          setState({ status: 'installing', progress });
-        });
-        setState({ status: 'prompt-start' });
-      } catch (err) {
-        setState({ status: 'error', message: (err as Error).message });
-      }
-    },
-    [ollamaService]
-  );
-
-  // Handle start confirmation
-  const handleStartConfirm = useCallback(
-    async (confirmed: boolean) => {
-      if (!confirmed) {
-        setState({ status: 'cancelled' });
-        return;
-      }
-
-      setState({ status: 'starting' });
-      try {
-        ollamaService.startOllama();
-        const started = await ollamaService.waitForOllama();
-        if (!started) {
-          throw new Error('Ollama failed to start within 10 seconds');
-        }
-        // Proceed to model check
-        await checkAndPullModel();
-      } catch (err) {
-        setState({ status: 'error', message: (err as Error).message });
-      }
-    },
-    [checkAndPullModel, ollamaService]
-  );
-
-  // Auto-confirm in non-interactive mode
-  useEffect(() => {
-    if (isInteractive) return;
-
-    if (state.status === 'prompt-install') {
-      const timer = setTimeout(() => handleInstallConfirm(true), 500);
-      return () => clearTimeout(timer);
-    }
-
-    if (state.status === 'prompt-start') {
-      const timer = setTimeout(() => handleStartConfirm(true), 500);
-      return () => clearTimeout(timer);
-    }
-  }, [isInteractive, state.status, handleInstallConfirm, handleStartConfirm]);
-
-  // Initial check
-  useEffect(() => {
-    const checkOllama = async () => {
-      // Skip Ollama check for non-Ollama providers
-      if (provider !== 'ollama') {
-        setState({ status: 'ready', model: provider });
-        setIsReady(true);
-        return;
-      }
-
-      setState({ status: 'checking' });
-
-      // Check if Ollama is running
-      const isRunning = await ollamaService.checkOllamaRunning();
-      if (isRunning) {
-        await checkAndPullModel();
-        return;
-      }
-
-      // Check if Ollama is installed
-      const isInstalled = await ollamaService.checkOllamaInstalled();
-      if (!isInstalled) {
-        setState({ status: 'prompt-install' });
-        return;
-      }
-
-      // Installed but not running
-      setState({ status: 'prompt-start' });
-    };
-
-    checkOllama();
-  }, [provider, checkAndPullModel, ollamaService]);
-
-  return {
-    state,
-    isReady,
-    handleInstallConfirm,
-    handleStartConfirm,
-  };
-}
diff --git a/packages/cli/tsup.config.ts b/packages/cli/tsup.config.ts
index 0d8c282..1d73678 100644
--- a/packages/cli/tsup.config.ts
+++ b/packages/cli/tsup.config.ts
@@ -15,12 +15,15 @@ export default defineConfig({
   external: [
     '@doc-agent/core',
     '@doc-agent/extract',
+    '@doc-agent/storage',
     '@doc-agent/vector-store',
     '@google/generative-ai',
+    '@lytics/kero',
     '@modelcontextprotocol/sdk',
     'chalk',
     'commander',
     'ora',
+    'prompts',
     'vectordb',
   ],
   tsconfig: './tsconfig.json',

From 9465797b967e21d9c49a833568afe563ed234344 Mon Sep 17 00:00:00 2001
From: prosdev <pros.seng@contentstack.com>
Date: Mon, 8 Dec 2025 10:40:58 -0800
Subject: [PATCH 3/5] feat(core): add dateRaw field for original date
 preservation

- date: Normalized ISO format (YYYY-MM-DD) for querying
- dateRaw: Original date string from document for audit trail
---
 packages/core/src/index.ts | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index 5eaece1..c60384e 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -6,7 +6,8 @@ export interface DocumentData {
   extractedAt: Date;
   vendor?: string;
   amount?: number;
-  date?: string;
+  date?: string; // Normalized ISO date (YYYY-MM-DD)
+  dateRaw?: string; // Original date from document
   items?: LineItem[];
   rawText?: string;
 }

From e725ab695b5dc53ee233d7824b701b4f345a5b9a Mon Sep 17 00:00:00 2001
From: prosdev <pros.seng@contentstack.com>
Date: Mon, 8 Dec 2025 10:41:14 -0800
Subject: [PATCH 4/5] chore: update dependencies and vitest config

- vitest.config.ts: Exclude barrel exports from coverage
- packages/extract/package.json: Add kero dependency
- pnpm-lock.yaml: Updated lockfile
---
 packages/extract/package.json |   1 -
 pnpm-lock.yaml                | 491 +++++++---------------------------
 vitest.config.ts              |   5 +-
 3 files changed, 100 insertions(+), 397 deletions(-)

diff --git a/packages/extract/package.json b/packages/extract/package.json
index 1181fe9..bb028a9 100644
--- a/packages/extract/package.json
+++ b/packages/extract/package.json
@@ -25,7 +25,6 @@
   "dependencies": {
     "@doc-agent/core": "workspace:*",
     "@google/generative-ai": "^0.24.1",
-    "@lytics/kero": "^1.0.0",
     "pdf-to-img": "^5.0.0",
     "tesseract.js": "^6.0.1",
     "zod": "^3.23.8"
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 2e04cc1..db3c22d 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -50,6 +50,9 @@ importers:
       '@google/generative-ai':
         specifier: ^0.24.1
         version: 0.24.1
+      '@lytics/kero':
+        specifier: ^1.0.0
+        version: 1.0.0
       '@modelcontextprotocol/sdk':
         specifier: ^1.24.3
         version: 1.24.3(zod@3.25.76)
@@ -59,18 +62,12 @@ importers:
       commander:
         specifier: ^14.0.2
         version: 14.0.2
-      ink:
-        specifier: ^6.5.1
-        version: 6.5.1(@types/react@19.2.7)(react@19.2.1)
-      ink-spinner:
-        specifier: ^5.0.0
-        version: 5.0.0(ink@6.5.1(@types/react@19.2.7)(react@19.2.1))(react@19.2.1)
       ora:
         specifier: ^9.0.0
         version: 9.0.0
-      react:
-        specifier: ^19.2.1
-        version: 19.2.1
+      prompts:
+        specifier: ^2.4.2
+        version: 2.4.2
       vectordb:
         specifier: ^0.21.2
         version: 0.21.2(@apache-arrow/ts@14.0.2)(apache-arrow@14.0.2)
@@ -78,24 +75,12 @@ importers:
         specifier: ^3.23.8
         version: 3.25.76
     devDependencies:
-      '@testing-library/dom':
-        specifier: ^10.4.1
-        version: 10.4.1
-      '@testing-library/react':
-        specifier: ^16.3.0
-        version: 16.3.0(@testing-library/dom@10.4.1)(@types/react@19.2.7)(react-dom@19.2.1(react@19.2.1))(react@19.2.1)
       '@types/node':
         specifier: ^24.10.1
         version: 24.10.1
-      '@types/react':
-        specifier: ^19.2.7
-        version: 19.2.7
-      ink-testing-library:
-        specifier: ^4.0.0
-        version: 4.0.0(@types/react@19.2.7)
-      jsdom:
-        specifier: ^27.2.0
-        version: 27.2.0
+      '@types/prompts':
+        specifier: ^2.4.9
+        version: 2.4.9
       tsx:
         specifier: ^4.21.0
         version: 4.21.0
@@ -117,9 +102,6 @@ importers:
       '@google/generative-ai':
         specifier: ^0.24.1
         version: 0.24.1
-      '@lytics/kero':
-        specifier: ^1.0.0
-        version: 1.0.0
       pdf-to-img:
         specifier: ^5.0.0
         version: 5.0.0
@@ -193,10 +175,6 @@ packages:
   '@acemir/cssom@0.9.28':
     resolution: {integrity: sha512-LuS6IVEivI75vKN8S04qRD+YySP0RmU/cV8UNukhQZvprxF+76Z43TNo/a08eCodaGhT1Us8etqS1ZRY9/Or0A==}
 
-  '@alcalzone/ansi-tokenize@0.2.2':
-    resolution: {integrity: sha512-mkOh+Wwawzuf5wa30bvc4nA+Qb6DIrGWgBhRR/Pw4T9nsgYait8izvXkNyU78D6Wcu3Z+KUdwCmLCxlWjEotYA==}
-    engines: {node: '>=18'}
-
   '@apache-arrow/ts@14.0.2':
     resolution: {integrity: sha512-CtwAvLkK0CZv7xsYeCo91ml6PvlfzAmAJZkRYuz2GNBwfYufj5SVi0iuSMwIMkcU/szVwvLdzORSLa5PlF/2ug==}
 
@@ -209,10 +187,6 @@ packages:
   '@asamuzakjp/nwsapi@2.3.9':
     resolution: {integrity: sha512-n8GuYSrI9bF7FFZ/SjhwevlHc8xaVlb/7HmHelnc/PZXBD2ZR49NnN9sMMuDdEGPeeRQ5d0hqlSlEpgCX3Wl0Q==}
 
-  '@babel/code-frame@7.27.1':
-    resolution: {integrity: sha512-cjQ7ZlQ0Mv3b47hABuTevyTuYN4i+loJKGeV9flcCgIK37cCXRh+L1bd3iBHlynerhQ7BhCkn2BPbQUL+rGqFg==}
-    engines: {node: '>=6.9.0'}
-
   '@babel/helper-string-parser@7.27.1':
     resolution: {integrity: sha512-qMlSxKbpRlAridDExk92nSobyDdpPijUq2DW6oDnUqd0iOGxmQjyqhMIihI9+zv4LPyZdRje2cavWPbCbWm3eA==}
     engines: {node: '>=6.9.0'}
@@ -226,10 +200,6 @@ packages:
     engines: {node: '>=6.0.0'}
     hasBin: true
 
-  '@babel/runtime@7.28.4':
-    resolution: {integrity: sha512-Q/N6JNWvIvPnLDvjlE1OUBLPQHH6l3CltCEsHIujp45zQUSSh8K+gHnaEX45yAT1nyngnINhvWtzN+Nb9D8RAQ==}
-    engines: {node: '>=6.9.0'}
-
   '@babel/types@7.28.5':
     resolution: {integrity: sha512-qQ5m48eI/MFLQ5PxQj4PFaprjyCTLI37ElWMmNs0K8Lk3dVeOdNpB3ks8jc7yM5CDmVC73eMVk/trk3fgmrUpA==}
     engines: {node: '>=6.9.0'}
@@ -1152,31 +1122,9 @@ packages:
   '@standard-schema/spec@1.0.0':
     resolution: {integrity: sha512-m2bOd0f2RT9k8QJx1JN85cZYyH1RqFBdlwtkSlf4tBDYLCiiZnv1fIIwacK6cqwXavOydf0NPToMQgpKq+dVlA==}
 
-  '@testing-library/dom@10.4.1':
-    resolution: {integrity: sha512-o4PXJQidqJl82ckFaXUeoAW+XysPLauYI43Abki5hABd853iMhitooc6znOnczgbTYmEP6U6/y1ZyKAIsvMKGg==}
-    engines: {node: '>=18'}
-
-  '@testing-library/react@16.3.0':
-    resolution: {integrity: sha512-kFSyxiEDwv1WLl2fgsq6pPBbw5aWKrsY2/noi1Id0TK0UParSF62oFQFGHXIyaG4pp2tEub/Zlel+fjjZILDsw==}
-    engines: {node: '>=18'}
-    peerDependencies:
-      '@testing-library/dom': ^10.0.0
-      '@types/react': ^18.0.0 || ^19.0.0
-      '@types/react-dom': ^18.0.0 || ^19.0.0
-      react: ^18.0.0 || ^19.0.0
-      react-dom: ^18.0.0 || ^19.0.0
-    peerDependenciesMeta:
-      '@types/react':
-        optional: true
-      '@types/react-dom':
-        optional: true
-
   '@tsconfig/node-lts@24.0.0':
     resolution: {integrity: sha512-8mSTqWwCd6aQpvxSrpQlMoA9RiUZSs7bYhL5qsLXIIaN9HQaINeoydrRu/Y7/fws4bvfuyhs0BRnW9/NI8tySg==}
 
-  '@types/aria-query@5.0.4':
-    resolution: {integrity: sha512-rfT93uj5s0PRL7EzccGMs3brplhcrghnDoV26NqKhCAS1hVo+WdNsPvE/yb6ilfr5hi2MEk6d5EWJTKdxg8jVw==}
-
   '@types/better-sqlite3@7.6.13':
     resolution: {integrity: sha512-NMv9ASNARoKksWtsq/SHakpYAYnhBrQgGD8zkLYk/jaK8jUGn08CfEdTRgYhMypUQAfzSP8W6gNLe0q19/t4VA==}
 
@@ -1207,6 +1155,9 @@ packages:
   '@types/pad-left@2.1.1':
     resolution: {integrity: sha512-Xd22WCRBydkGSApl5Bw0PhAOHKSVjNL3E3AwzKaps96IMraPqy5BvZIsBVK6JLwdybUzjHnuWVwpDd0JjTfHXA==}
 
+  '@types/prompts@2.4.9':
+    resolution: {integrity: sha512-qTxFi6Buiu8+50/+3DGIWLHM6QuWsEKugJnnP6iv2Mc4ncxE4A/OJkjuVOA+5X0X1S/nq5VJRa8Lu+nwcvbrKA==}
+
   '@types/react@19.2.7':
     resolution: {integrity: sha512-MWtvHrGZLFttgeEj28VXHxpmwYbor/ATPYbBfSFZEIRK0ecCFLl2Qo55z52Hss+UV9CRN7trSeq1zbgx7YDWWg==}
 
@@ -1276,10 +1227,6 @@ packages:
     resolution: {integrity: sha512-g6LhBsl+GBPRWGWsBtutpzBYuIIdBkLEvad5C/va/74Db018+5TZiyA26cZJAr3Rft5lprVqOIPxf5Vid6tqAw==}
     engines: {node: '>=18'}
 
-  ansi-regex@5.0.1:
-    resolution: {integrity: sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==}
-    engines: {node: '>=8'}
-
   ansi-regex@6.2.2:
     resolution: {integrity: sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg==}
     engines: {node: '>=12'}
@@ -1288,10 +1235,6 @@ packages:
     resolution: {integrity: sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==}
     engines: {node: '>=8'}
 
-  ansi-styles@5.2.0:
-    resolution: {integrity: sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==}
-    engines: {node: '>=10'}
-
   ansi-styles@6.2.3:
     resolution: {integrity: sha512-4Dj6M28JB+oAH8kFkTLUo+a2jwOFkuqb3yucU0CANcRRUbxS0cP0nZYCGjcc3BNXwRIsUVmDGgzawme7zvJHvg==}
     engines: {node: '>=12'}
@@ -1303,9 +1246,6 @@ packages:
     resolution: {integrity: sha512-EBO2xJN36/XoY81nhLcwCJgFwkboDZeyNQ+OPsG7bCoQjc2BT0aTyH/MR6SrL+LirSNz+cYqjGRlupMMlP1aEg==}
     hasBin: true
 
-  aria-query@5.3.0:
-    resolution: {integrity: sha512-b0P0sZPKtyu8HkeRAfCq0IfURZK+SuwMjY1UXGBU27wpAiTwQAIlq56IbIO+ytk/JjS1fMR14ee5WBBfKi5J6A==}
-
   array-back@3.1.0:
     resolution: {integrity: sha512-TkuxA4UCOvxuDK6NZYXCalszEzj+TLszyASooky+i742l9TqsOdYCMJJupxRic61hwquNtppB3hgcuq9SVSH1Q==}
     engines: {node: '>=6'}
@@ -1324,10 +1264,6 @@ packages:
   asynckit@0.4.0:
     resolution: {integrity: sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==}
 
-  auto-bind@5.0.1:
-    resolution: {integrity: sha512-ooviqdwwgfIfNmDwo94wlshcdzfO64XV0Cg6oDsDYBJfITDz1EngD2z7DkbvCWn+XIMsIqW27sEVF6qcpJrRcg==}
-    engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0}
-
   axios@1.13.2:
     resolution: {integrity: sha512-VPk9ebNqPcy5lRGuSlKx752IlDatOjT9paPlm8A7yOuW2Fbvp4X3JznJtT4f0GzGLLiWE9W8onz51SqLYwzGaA==}
 
@@ -1408,22 +1344,10 @@ packages:
   chownr@1.1.4:
     resolution: {integrity: sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==}
 
-  cli-boxes@3.0.0:
-    resolution: {integrity: sha512-/lzGpEWL/8PfI0BmBOPRwp0c/wFNX1RdUML3jK/RcSBA9T8mZDdQpqYBKtCFTOfQbwPqWEOpjqW+Fnayc0969g==}
-    engines: {node: '>=10'}
-
-  cli-cursor@4.0.0:
-    resolution: {integrity: sha512-VGtlMu3x/4DOtIUwEkRezxUZ2lBacNJCHash0N0WeZDBS+7Ux1dm3XWAgWYxLJFMMdOeXMHXorshEFhbMSGelg==}
-    engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0}
-
   cli-cursor@5.0.0:
     resolution: {integrity: sha512-aCj4O5wKyszjMmDT4tZj93kxyydN/K5zPWSCe6/0AV/AA1pqe5ZBIw0a2ZfPQV7lL5/yb5HsUreJ6UFAF1tEQw==}
     engines: {node: '>=18'}
 
-  cli-spinners@2.9.2:
-    resolution: {integrity: sha512-ywqV+5MmyL4E7ybXgKys4DugZbX0FC6LnwrhjuykIjnK9k8OQacQ7axGKnjDXWNhns0xot3bZI5h55H8yo9cJg==}
-    engines: {node: '>=6'}
-
   cli-spinners@3.3.0:
     resolution: {integrity: sha512-/+40ljC3ONVnYIttjMWrlL51nItDAbBrq2upN8BPyvGU/2n5Oxw3tbNwORCaNuNqLJnxGqOfjUuhsv7l5Q4IsQ==}
     engines: {node: '>=18.20'}
@@ -1432,10 +1356,6 @@ packages:
     resolution: {integrity: sha512-SroPvNHxUnk+vIW/dOSfNqdy1sPEFkrTk6TUtqLCnBlo3N7TNYYkzzN7uSD6+jVjrdO4+p8nH7JzH6cIvUem6A==}
     engines: {node: '>=20'}
 
-  code-excerpt@4.0.0:
-    resolution: {integrity: sha512-xxodCmBen3iy2i0WtAK8FlFNrRzjUqjRsMfho58xT/wvZU1YTM3fCnRjcy1gJPMepaRlgm/0e6w8SpWHpn3/cA==}
-    engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0}
-
   color-convert@2.0.1:
     resolution: {integrity: sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==}
     engines: {node: '>=7.0.0'}
@@ -1481,10 +1401,6 @@ packages:
     resolution: {integrity: sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==}
     engines: {node: '>= 0.6'}
 
-  convert-to-spaces@2.0.1:
-    resolution: {integrity: sha512-rcQ1bsQO9799wq24uE5AM2tAILy4gXGIK/njFWcVQkGNZ96edlpY+A7bjwvzjYvLDyzmG1MmMLZhpcsb+klNMQ==}
-    engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0}
-
   cookie-signature@1.2.2:
     resolution: {integrity: sha512-D76uU73ulSXrD1UXF4KE2TMxVVwhsnCgfAyTg9k8P6KGZjlXKrOLe4dJQKI3Bxi5wjesZoFXJWElNWBjPZMbhg==}
     engines: {node: '>=6.6.0'}
@@ -1544,17 +1460,10 @@ packages:
     resolution: {integrity: sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==}
     engines: {node: '>= 0.8'}
 
-  dequal@2.0.3:
-    resolution: {integrity: sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==}
-    engines: {node: '>=6'}
-
   detect-libc@2.1.2:
     resolution: {integrity: sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==}
     engines: {node: '>=8'}
 
-  dom-accessibility-api@0.5.16:
-    resolution: {integrity: sha512-X7BJ2yElsnOJ30pZF4uIIDfBEVgF4XEBxL9Bxhy6dnrm5hkzqmsWHGTiHqRiITNhMyFLyAiWndIJP7Z1NTteDg==}
-
   drizzle-kit@0.28.1:
     resolution: {integrity: sha512-JimOV+ystXTWMgZkLHYHf2w3oS28hxiH1FR0dkmJLc7GHzdGJoJAQtQS5DRppnabsRZwE2U1F6CuezVBgmsBBQ==}
     hasBin: true
@@ -1699,9 +1608,6 @@ packages:
     resolution: {integrity: sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==}
     engines: {node: '>= 0.4'}
 
-  es-toolkit@1.42.0:
-    resolution: {integrity: sha512-SLHIyY7VfDJBM8clz4+T2oquwTQxEzu263AyhVK4jREOAwJ+8eebaa4wM3nlvnAqhDrMm2EsA6hWHaQsMPQ1nA==}
-
   esbuild-register@3.6.0:
     resolution: {integrity: sha512-H2/S7Pm8a9CL1uhp9OvjwrBh5Pvx0H8qVOxNu8Wed9Y7qv56MPtq+GGM8RJpq6glYJn9Wspr8uw7l55uyinNeg==}
     peerDependencies:
@@ -1730,10 +1636,6 @@ packages:
   escape-html@1.0.3:
     resolution: {integrity: sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==}
 
-  escape-string-regexp@2.0.0:
-    resolution: {integrity: sha512-UpzcLCXolUWcNu5HtVMHYdXJjArjsF9C0aNnquZYY4uW/Vu0miy5YoWvbV345HauVvcAUnpRuhMMcqTcGOY2+w==}
-    engines: {node: '>=8'}
-
   estree-walker@3.0.3:
     resolution: {integrity: sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g==}
 
@@ -1909,45 +1811,12 @@ packages:
   ieee754@1.2.1:
     resolution: {integrity: sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==}
 
-  indent-string@5.0.0:
-    resolution: {integrity: sha512-m6FAo/spmsW2Ab2fU35JTYwtOKa2yAwXSwgjSv1TJzh4Mh7mC3lzAOVLBprb72XsTrgkEIsl7YrFNAiDiRhIGg==}
-    engines: {node: '>=12'}
-
   inherits@2.0.4:
     resolution: {integrity: sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==}
 
   ini@1.3.8:
     resolution: {integrity: sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==}
 
-  ink-spinner@5.0.0:
-    resolution: {integrity: sha512-EYEasbEjkqLGyPOUc8hBJZNuC5GvXGMLu0w5gdTNskPc7Izc5vO3tdQEYnzvshucyGCBXc86ig0ujXPMWaQCdA==}
-    engines: {node: '>=14.16'}
-    peerDependencies:
-      ink: '>=4.0.0'
-      react: '>=18.0.0'
-
-  ink-testing-library@4.0.0:
-    resolution: {integrity: sha512-yF92kj3pmBvk7oKbSq5vEALO//o7Z9Ck/OaLNlkzXNeYdwfpxMQkSowGTFUCS5MSu9bWfSZMewGpp7bFc66D7Q==}
-    engines: {node: '>=18'}
-    peerDependencies:
-      '@types/react': '>=18.0.0'
-    peerDependenciesMeta:
-      '@types/react':
-        optional: true
-
-  ink@6.5.1:
-    resolution: {integrity: sha512-wF3j/DmkM8q5E+OtfdQhCRw8/0ahkc8CUTgEddxZzpEWPslu7YPL3t64MWRoI9m6upVGpfAg4ms2BBvxCdKRLQ==}
-    engines: {node: '>=20'}
-    peerDependencies:
-      '@types/react': '>=19.0.0'
-      react: '>=19.0.0'
-      react-devtools-core: ^6.1.2
-    peerDependenciesMeta:
-      '@types/react':
-        optional: true
-      react-devtools-core:
-        optional: true
-
   ipaddr.js@1.9.1:
     resolution: {integrity: sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==}
     engines: {node: '>= 0.10'}
@@ -1956,11 +1825,6 @@ packages:
     resolution: {integrity: sha512-5XHYaSyiqADb4RnZ1Bdad6cPp8Toise4TzEjcOYDHZkTCbKgiUl7WTUCpNWHuxmDt91wnsZBc9xinNzopv3JMQ==}
     engines: {node: '>=18'}
 
-  is-in-ci@2.0.0:
-    resolution: {integrity: sha512-cFeerHriAnhrQSbpAxL37W1wcJKUUX07HyLWZCW1URJT/ra3GyUTzBgUnh24TMVfNTV2Hij2HLxkPHFZfOZy5w==}
-    engines: {node: '>=20'}
-    hasBin: true
-
   is-interactive@2.0.0:
     resolution: {integrity: sha512-qP1vozQRI+BMOPcjFzrjXuQvdak2pHNUMZoeG2eRbiSqyvbEf/wQtEOTOX1guk6E3t36RkaqiSt8A/6YElNxLQ==}
     engines: {node: '>=12'}
@@ -2008,9 +1872,6 @@ packages:
     resolution: {integrity: sha512-34wB/Y7MW7bzjKRjUKTa46I2Z7eV62Rkhva+KkopW7Qvv/OSWBqvkSY7vusOPrNuZcUG3tApvdVgNB8POj3SPw==}
     engines: {node: '>=10'}
 
-  js-tokens@4.0.0:
-    resolution: {integrity: sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==}
-
   js-tokens@9.0.1:
     resolution: {integrity: sha512-mxa9E9ITFOt0ban3j6L5MpjwegGz6lBQmM1IJkWeBZGcMxto50+eWdjC/52xDbS2vy0k7vIMK0Fe2wfL9OQSpQ==}
 
@@ -2030,6 +1891,10 @@ packages:
   json-schema-traverse@1.0.0:
     resolution: {integrity: sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==}
 
+  kleur@3.0.3:
+    resolution: {integrity: sha512-eTIzlVOSUR+JxdDFepEYcBMtZ9Qqdef+rnzWdRZuMbOywu5tO2w2N7rqjoANZ5k9vywhL6Br1VRjUIgTQx4E8w==}
+    engines: {node: '>=6'}
+
   lilconfig@3.1.3:
     resolution: {integrity: sha512-/vlFKAoH5Cgt3Ie+JLhRbwOsCQePABiU3tJ1egGvyQ+33R/vcwM2Zl2QR/LzjsBeItPt3oSVXapn+m4nQDvpzw==}
     engines: {node: '>=14'}
@@ -2068,10 +1933,6 @@ packages:
     resolution: {integrity: sha512-B5Y16Jr9LB9dHVkh6ZevG+vAbOsNOYCX+sXvFWFu7B3Iz5mijW3zdbMyhsh8ANd2mSWBYdJgnqi+mL7/LrOPYg==}
     engines: {node: 20 || >=22}
 
-  lz-string@1.5.0:
-    resolution: {integrity: sha512-h5bgJWpxJNswbU7qCrV0tIKQCaS3blPDrqKWx+QxzuzL1zGUzij9XCWLrSLsJPu5t+eWA/ycetzYAO5IOMcWAQ==}
-    hasBin: true
-
   magic-string@0.30.21:
     resolution: {integrity: sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ==}
 
@@ -2117,10 +1978,6 @@ packages:
     resolution: {integrity: sha512-Lbgzdk0h4juoQ9fCKXW4by0UJqj+nOOrI9MJ1sSj4nI8aI2eo1qmvQEie4VD1glsS250n15LsWsYtCugiStS5A==}
     engines: {node: '>=18'}
 
-  mimic-fn@2.1.0:
-    resolution: {integrity: sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg==}
-    engines: {node: '>=6'}
-
   mimic-function@5.0.1:
     resolution: {integrity: sha512-VP79XUPxV2CigYP3jWwAUFSku2aKqBH7uTAapFWCBqutsbmDo96KY5o8uh6U+/YSIn5OxJnXp73beVkpqMIGhA==}
     engines: {node: '>=18'}
@@ -2191,10 +2048,6 @@ packages:
   once@1.4.0:
     resolution: {integrity: sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==}
 
-  onetime@5.1.2:
-    resolution: {integrity: sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==}
-    engines: {node: '>=6'}
-
   onetime@7.0.0:
     resolution: {integrity: sha512-VXJjc87FScF88uafS3JllDgvAm+c/Slfz06lorj2uAY34rlUu0Nt+v8wreiImcrgAjjIHp1rXpTDlLOGw29WwQ==}
     engines: {node: '>=18'}
@@ -2218,10 +2071,6 @@ packages:
     resolution: {integrity: sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==}
     engines: {node: '>= 0.8'}
 
-  patch-console@2.0.0:
-    resolution: {integrity: sha512-0YNdUceMdaQwoKce1gatDScmMo5pu/tfABfnzEqeG0gtTmd7mh/WcwgUjtAeOU7N8nFFlbQBnFK2gXW5fGvmMA==}
-    engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0}
-
   path-key@3.1.1:
     resolution: {integrity: sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==}
     engines: {node: '>=8'}
@@ -2295,9 +2144,9 @@ packages:
     engines: {node: '>=10'}
     hasBin: true
 
-  pretty-format@27.5.1:
-    resolution: {integrity: sha512-Qb1gy5OrP5+zDf2Bvnzdl3jsTf1qXVMazbvCoKhtKqVs4/YK4ozX4gKQJJVyNe+cajNPn0KoC0MC3FUmaHWEmQ==}
-    engines: {node: ^10.13.0 || ^12.13.0 || ^14.15.0 || >=15.0.0}
+  prompts@2.4.2:
+    resolution: {integrity: sha512-NxNv/kLguCA7p3jE8oL2aEBsrJWgAakBpgmgK6lpPWV+WuOmY6r2/zbAVnP+T8bQlA0nzHXSJSJW0Hq7ylaD2Q==}
+    engines: {node: '>= 6'}
 
   proxy-addr@2.0.7:
     resolution: {integrity: sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==}
@@ -2329,20 +2178,6 @@ packages:
     resolution: {integrity: sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw==}
     hasBin: true
 
-  react-dom@19.2.1:
-    resolution: {integrity: sha512-ibrK8llX2a4eOskq1mXKu/TGZj9qzomO+sNfO98M6d9zIPOEhlBkMkBUBLd1vgS0gQsLDBzA+8jJBVXDnfHmJg==}
-    peerDependencies:
-      react: ^19.2.1
-
-  react-is@17.0.2:
-    resolution: {integrity: sha512-w2GsyukL62IJnlaff/nRegPQR94C/XXamvMWmSHRJ4y7Ts/4ocGRmTHvOs8PSE6pB3dWOrD/nueuU5sduBsQ4w==}
-
-  react-reconciler@0.33.0:
-    resolution: {integrity: sha512-KetWRytFv1epdpJc3J4G75I4WrplZE5jOL7Yq0p34+OVOKF4Se7WrdIdVC45XsSSmUTlht2FM/fM1FZb1mfQeA==}
-    engines: {node: '>=0.10.0'}
-    peerDependencies:
-      react: ^19.2.0
-
   react@19.2.1:
     resolution: {integrity: sha512-DGrYcCWK7tvYMnWh79yrPHt+vdx9tY+1gPZa7nJQtO/p8bLTDaHp4dzwEhQB7pZ4Xe3ok4XKuEPrVuc+wlpkmw==}
     engines: {node: '>=0.10.0'}
@@ -2373,10 +2208,6 @@ packages:
   resolve-pkg-maps@1.0.0:
     resolution: {integrity: sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==}
 
-  restore-cursor@4.0.0:
-    resolution: {integrity: sha512-I9fPXU9geO9bHOt9pHHOhOkYerIMsmVaWB0rA2AI9ERh/+x/i7MV5HKBNrg+ljO5eoPVgCcnFuRjJ9uH6I/3eg==}
-    engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0}
-
   restore-cursor@5.1.0:
     resolution: {integrity: sha512-oMA2dcrw6u0YfxJQXm342bFKX/E4sG9rbTzO9ptUcR/e8A33cHuvStiYOwH7fszkZlZ1z/ta9AAoPk2F4qIOHA==}
     engines: {node: '>=18'}
@@ -2403,9 +2234,6 @@ packages:
     resolution: {integrity: sha512-xAg7SOnEhrm5zI3puOOKyy1OMcMlIJZYNJY7xLBwSze0UjhPLnWfj2GF2EpT0jmzaJKIWKHLsaSSajf35bcYnA==}
     engines: {node: '>=v12.22.7'}
 
-  scheduler@0.27.0:
-    resolution: {integrity: sha512-eNv+WrVbKu1f3vbYJT/xtiF5syA5HPIMtf9IgY/nKg0sWqzAUEvqY/xm7OcZc/qafLx/iO9FgOmeSAp4v5ti/Q==}
-
   semver@7.7.3:
     resolution: {integrity: sha512-SdsKMrI9TdgjdweUSR9MweHA4EJ8YxHn8DFaDisvhVlUOe4BF1tLD7GAj0lIqWVl+dPb/rExr0Btby5loQm20Q==}
     engines: {node: '>=10'}
@@ -2449,9 +2277,6 @@ packages:
   siginfo@2.0.0:
     resolution: {integrity: sha512-ybx0WO1/8bSBLEWXZvEd7gMW3Sn3JFlW3TvX1nREbDLRNQNaeNN8WK0meBwPdAaOI7TtRRRJn/Es1zhrrCHu7g==}
 
-  signal-exit@3.0.7:
-    resolution: {integrity: sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==}
-
   signal-exit@4.1.0:
     resolution: {integrity: sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==}
     engines: {node: '>=14'}
@@ -2462,6 +2287,9 @@ packages:
   simple-get@4.0.1:
     resolution: {integrity: sha512-brv7p5WgH0jmQJr1ZDDfKDOSeWWg+OVypG99A/5vYGPqJ6pxiaHLy8nxtFjBA7oMa01ebA9gfh1uMCFqOuXxvA==}
 
+  sisteransi@1.0.5:
+    resolution: {integrity: sha512-bLGGlR1QxBcynn2d5YmDX4MGjlZvy2MRBDRNHLJ8VI6l6+9FUiyTFNJ0IveOSP0bcXgVDPRcfGqA0pjaqUpfVg==}
+
   slice-ansi@7.1.2:
     resolution: {integrity: sha512-iOBWFgUX7caIZiuutICxVgX1SdxwAVFFKwt1EvMYYec/NWO5meOJ6K5uQxhrYBdQJne4KxiqZc+KptFOWFSI9w==}
     engines: {node: '>=18'}
@@ -2481,10 +2309,6 @@ packages:
     resolution: {integrity: sha512-i5uvt8C3ikiWeNZSVZNWcfZPItFQOsYTUAOkcUPGd8DqDy1uOUikjt5dG+uRlwyvR108Fb9DOd4GvXfT0N2/uQ==}
     engines: {node: '>= 12'}
 
-  stack-utils@2.0.6:
-    resolution: {integrity: sha512-XlkWvfIm6RmsWtNJx+uqtKLS8eqFbxUg0ZzLXqY0caEy9l7hruX8IpiDnjsLavoBgqCCR71TqWO8MaXYheJ3RQ==}
-    engines: {node: '>=10'}
-
   stackback@0.0.2:
     resolution: {integrity: sha512-1XMJE5fQo1jGH6Y/7ebnwPOBEkIEnT4QF32d5R1+VXdXveM0IBMJt8zfaxX1P3QhVwrYe+576+jkANtSS2mBbw==}
 
@@ -2644,10 +2468,6 @@ packages:
   tunnel-agent@0.6.0:
     resolution: {integrity: sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w==}
 
-  type-fest@4.41.0:
-    resolution: {integrity: sha512-TeTSQ6H5YHvpqVwBRcnLDCBnDOHWYu7IvGbHT6N8AOymcr9PJGjc1GTtiWZTYg0NCgYwvnYWEkVChQAr9bjfwA==}
-    engines: {node: '>=16'}
-
   type-is@2.0.1:
     resolution: {integrity: sha512-OZs6gsjF4vMp32qrCbiVSkrFmXtG/AZhY3t0iAMrMBiAZyV9oALtXO8hsrHbMXF9x6L3grlFuwW2oAz7cav+Gw==}
     engines: {node: '>= 0.6'}
@@ -2806,10 +2626,6 @@ packages:
     engines: {node: '>=8'}
     hasBin: true
 
-  widest-line@5.0.0:
-    resolution: {integrity: sha512-c9bZp7b5YtRj2wOe6dlj32MK+Bx/M/d+9VB2SHM1OtsUHR0aV0tdP6DWh/iMt0kWi1t5g1Iudu6hQRNd1A4PVA==}
-    engines: {node: '>=18'}
-
   wordwrapjs@5.1.1:
     resolution: {integrity: sha512-0yweIbkINJodk27gX9LBGMzyQdBDan3s/dEAiwBOj+Mf0PPyWL6/rikalkv8EeD0E8jm4o5RXEOrFTP3NXbhJg==}
     engines: {node: '>=12.17'}
@@ -2849,9 +2665,6 @@ packages:
     resolution: {integrity: sha512-CzhO+pFNo8ajLM2d2IW/R93ipy99LWjtwblvC1RsoSUMZgyLbYFr221TnSNT7GjGdYui6P459mw9JH/g/zW2ug==}
     engines: {node: '>=18'}
 
-  yoga-layout@3.2.1:
-    resolution: {integrity: sha512-0LPOt3AxKqMdFBZA3HBAt/t/8vIKq7VaQYbuA8WxCgung+p9TVyKRYdpvCb80HcdTN2NkbIKbhNwKUfm3tQywQ==}
-
   zlibjs@0.3.1:
     resolution: {integrity: sha512-+J9RrgTKOmlxFSDHo0pI1xM6BLVUv+o0ZT9ANtCxGkjIVCCUdx9alUF8Gm+dGLKbkkkidWIHFDZHDMpfITt4+w==}
 
@@ -2870,12 +2683,8 @@ snapshots:
       lodash: 4.17.21
       typical: 7.3.0
 
-  '@acemir/cssom@0.9.28': {}
-
-  '@alcalzone/ansi-tokenize@0.2.2':
-    dependencies:
-      ansi-styles: 6.2.3
-      is-fullwidth-code-point: 5.1.0
+  '@acemir/cssom@0.9.28':
+    optional: true
 
   '@apache-arrow/ts@14.0.2':
     dependencies:
@@ -2897,6 +2706,7 @@ snapshots:
       '@csstools/css-parser-algorithms': 3.0.5(@csstools/css-tokenizer@3.0.4)
       '@csstools/css-tokenizer': 3.0.4
       lru-cache: 11.2.4
+    optional: true
 
   '@asamuzakjp/dom-selector@6.7.6':
     dependencies:
@@ -2905,14 +2715,10 @@ snapshots:
       css-tree: 3.1.0
       is-potential-custom-element-name: 1.0.1
       lru-cache: 11.2.4
+    optional: true
 
-  '@asamuzakjp/nwsapi@2.3.9': {}
-
-  '@babel/code-frame@7.27.1':
-    dependencies:
-      '@babel/helper-validator-identifier': 7.28.5
-      js-tokens: 4.0.0
-      picocolors: 1.1.1
+  '@asamuzakjp/nwsapi@2.3.9':
+    optional: true
 
   '@babel/helper-string-parser@7.27.1': {}
 
@@ -2922,8 +2728,6 @@ snapshots:
     dependencies:
       '@babel/types': 7.28.5
 
-  '@babel/runtime@7.28.4': {}
-
   '@babel/types@7.28.5':
     dependencies:
       '@babel/helper-string-parser': 7.27.1
@@ -2966,12 +2770,14 @@ snapshots:
   '@biomejs/cli-win32-x64@2.3.8':
     optional: true
 
-  '@csstools/color-helpers@5.1.0': {}
+  '@csstools/color-helpers@5.1.0':
+    optional: true
 
   '@csstools/css-calc@2.1.4(@csstools/css-parser-algorithms@3.0.5(@csstools/css-tokenizer@3.0.4))(@csstools/css-tokenizer@3.0.4)':
     dependencies:
       '@csstools/css-parser-algorithms': 3.0.5(@csstools/css-tokenizer@3.0.4)
       '@csstools/css-tokenizer': 3.0.4
+    optional: true
 
   '@csstools/css-color-parser@3.1.0(@csstools/css-parser-algorithms@3.0.5(@csstools/css-tokenizer@3.0.4))(@csstools/css-tokenizer@3.0.4)':
     dependencies:
@@ -2979,14 +2785,18 @@ snapshots:
       '@csstools/css-calc': 2.1.4(@csstools/css-parser-algorithms@3.0.5(@csstools/css-tokenizer@3.0.4))(@csstools/css-tokenizer@3.0.4)
       '@csstools/css-parser-algorithms': 3.0.5(@csstools/css-tokenizer@3.0.4)
       '@csstools/css-tokenizer': 3.0.4
+    optional: true
 
   '@csstools/css-parser-algorithms@3.0.5(@csstools/css-tokenizer@3.0.4)':
     dependencies:
       '@csstools/css-tokenizer': 3.0.4
+    optional: true
 
-  '@csstools/css-syntax-patches-for-csstree@1.0.20': {}
+  '@csstools/css-syntax-patches-for-csstree@1.0.20':
+    optional: true
 
-  '@csstools/css-tokenizer@3.0.4': {}
+  '@csstools/css-tokenizer@3.0.4':
+    optional: true
 
   '@drizzle-team/brocli@0.10.2': {}
 
@@ -3457,30 +3267,8 @@ snapshots:
 
   '@standard-schema/spec@1.0.0': {}
 
-  '@testing-library/dom@10.4.1':
-    dependencies:
-      '@babel/code-frame': 7.27.1
-      '@babel/runtime': 7.28.4
-      '@types/aria-query': 5.0.4
-      aria-query: 5.3.0
-      dom-accessibility-api: 0.5.16
-      lz-string: 1.5.0
-      picocolors: 1.1.1
-      pretty-format: 27.5.1
-
-  '@testing-library/react@16.3.0(@testing-library/dom@10.4.1)(@types/react@19.2.7)(react-dom@19.2.1(react@19.2.1))(react@19.2.1)':
-    dependencies:
-      '@babel/runtime': 7.28.4
-      '@testing-library/dom': 10.4.1
-      react: 19.2.1
-      react-dom: 19.2.1(react@19.2.1)
-    optionalDependencies:
-      '@types/react': 19.2.7
-
   '@tsconfig/node-lts@24.0.0': {}
 
-  '@types/aria-query@5.0.4': {}
-
   '@types/better-sqlite3@7.6.13':
     dependencies:
       '@types/node': 22.19.1
@@ -3510,9 +3298,15 @@ snapshots:
 
   '@types/pad-left@2.1.1': {}
 
+  '@types/prompts@2.4.9':
+    dependencies:
+      '@types/node': 24.10.1
+      kleur: 3.0.3
+
   '@types/react@19.2.7':
     dependencies:
       csstype: 3.2.3
+    optional: true
 
   '@vitest/coverage-v8@4.0.15(vitest@4.0.15(@types/node@24.10.1)(jsdom@27.2.0)(tsx@4.21.0)(yaml@2.8.2))':
     dependencies:
@@ -3577,7 +3371,8 @@ snapshots:
 
   acorn@8.15.0: {}
 
-  agent-base@7.1.4: {}
+  agent-base@7.1.4:
+    optional: true
 
   ajv-formats@3.0.1(ajv@8.17.1):
     optionalDependencies:
@@ -3594,16 +3389,12 @@ snapshots:
     dependencies:
       environment: 1.1.0
 
-  ansi-regex@5.0.1: {}
-
   ansi-regex@6.2.2: {}
 
   ansi-styles@4.3.0:
     dependencies:
       color-convert: 2.0.1
 
-  ansi-styles@5.2.0: {}
-
   ansi-styles@6.2.3: {}
 
   any-promise@1.3.0: {}
@@ -3621,10 +3412,6 @@ snapshots:
       pad-left: 2.1.0
       tslib: 2.8.1
 
-  aria-query@5.3.0:
-    dependencies:
-      dequal: 2.0.3
-
   array-back@3.1.0: {}
 
   array-back@6.2.2: {}
@@ -3639,8 +3426,6 @@ snapshots:
 
   asynckit@0.4.0: {}
 
-  auto-bind@5.0.1: {}
-
   axios@1.13.2:
     dependencies:
       follow-redirects: 1.15.11
@@ -3659,6 +3444,7 @@ snapshots:
   bidi-js@1.0.3:
     dependencies:
       require-from-string: 2.0.2
+    optional: true
 
   bindings@1.5.0:
     dependencies:
@@ -3735,18 +3521,10 @@ snapshots:
 
   chownr@1.1.4: {}
 
-  cli-boxes@3.0.0: {}
-
-  cli-cursor@4.0.0:
-    dependencies:
-      restore-cursor: 4.0.0
-
   cli-cursor@5.0.0:
     dependencies:
       restore-cursor: 5.1.0
 
-  cli-spinners@2.9.2: {}
-
   cli-spinners@3.3.0: {}
 
   cli-truncate@5.1.1:
@@ -3754,10 +3532,6 @@ snapshots:
       slice-ansi: 7.1.2
       string-width: 8.1.0
 
-  code-excerpt@4.0.0:
-    dependencies:
-      convert-to-spaces: 2.0.1
-
   color-convert@2.0.1:
     dependencies:
       color-name: 1.1.4
@@ -3796,8 +3570,6 @@ snapshots:
 
   content-type@1.0.5: {}
 
-  convert-to-spaces@2.0.1: {}
-
   cookie-signature@1.2.2: {}
 
   cookie@0.7.2: {}
@@ -3817,25 +3589,30 @@ snapshots:
     dependencies:
       mdn-data: 2.12.2
       source-map-js: 1.2.1
+    optional: true
 
   cssstyle@5.3.3:
     dependencies:
       '@asamuzakjp/css-color': 4.1.0
       '@csstools/css-syntax-patches-for-csstree': 1.0.20
       css-tree: 3.1.0
+    optional: true
 
-  csstype@3.2.3: {}
+  csstype@3.2.3:
+    optional: true
 
   data-urls@6.0.0:
     dependencies:
       whatwg-mimetype: 4.0.0
       whatwg-url: 15.1.0
+    optional: true
 
   debug@4.4.3:
     dependencies:
       ms: 2.1.3
 
-  decimal.js@10.6.0: {}
+  decimal.js@10.6.0:
+    optional: true
 
   decompress-response@6.0.0:
     dependencies:
@@ -3847,12 +3624,8 @@ snapshots:
 
   depd@2.0.0: {}
 
-  dequal@2.0.3: {}
-
   detect-libc@2.1.2: {}
 
-  dom-accessibility-api@0.5.16: {}
-
   drizzle-kit@0.28.1:
     dependencies:
       '@drizzle-team/brocli': 0.10.2
@@ -3885,7 +3658,8 @@ snapshots:
     dependencies:
       once: 1.4.0
 
-  entities@6.0.1: {}
+  entities@6.0.1:
+    optional: true
 
   env-paths@3.0.0: {}
 
@@ -3908,8 +3682,6 @@ snapshots:
       has-tostringtag: 1.0.2
       hasown: 2.0.2
 
-  es-toolkit@1.42.0: {}
-
   esbuild-register@3.6.0(esbuild@0.19.12):
     dependencies:
       debug: 4.4.3
@@ -4028,8 +3800,6 @@ snapshots:
 
   escape-html@1.0.3: {}
 
-  escape-string-regexp@2.0.0: {}
-
   estree-walker@3.0.3:
     dependencies:
       '@types/estree': 1.0.8
@@ -4186,6 +3956,7 @@ snapshots:
   html-encoding-sniffer@4.0.0:
     dependencies:
       whatwg-encoding: 3.1.1
+    optional: true
 
   html-escaper@2.0.2: {}
 
@@ -4203,6 +3974,7 @@ snapshots:
       debug: 4.4.3
     transitivePeerDependencies:
       - supports-color
+    optional: true
 
   https-proxy-agent@7.0.6:
     dependencies:
@@ -4210,10 +3982,12 @@ snapshots:
       debug: 4.4.3
     transitivePeerDependencies:
       - supports-color
+    optional: true
 
   iconv-lite@0.6.3:
     dependencies:
       safer-buffer: 2.1.2
+    optional: true
 
   iconv-lite@0.7.0:
     dependencies:
@@ -4223,67 +3997,22 @@ snapshots:
 
   ieee754@1.2.1: {}
 
-  indent-string@5.0.0: {}
-
   inherits@2.0.4: {}
 
   ini@1.3.8: {}
 
-  ink-spinner@5.0.0(ink@6.5.1(@types/react@19.2.7)(react@19.2.1))(react@19.2.1):
-    dependencies:
-      cli-spinners: 2.9.2
-      ink: 6.5.1(@types/react@19.2.7)(react@19.2.1)
-      react: 19.2.1
-
-  ink-testing-library@4.0.0(@types/react@19.2.7):
-    optionalDependencies:
-      '@types/react': 19.2.7
-
-  ink@6.5.1(@types/react@19.2.7)(react@19.2.1):
-    dependencies:
-      '@alcalzone/ansi-tokenize': 0.2.2
-      ansi-escapes: 7.2.0
-      ansi-styles: 6.2.3
-      auto-bind: 5.0.1
-      chalk: 5.6.2
-      cli-boxes: 3.0.0
-      cli-cursor: 4.0.0
-      cli-truncate: 5.1.1
-      code-excerpt: 4.0.0
-      es-toolkit: 1.42.0
-      indent-string: 5.0.0
-      is-in-ci: 2.0.0
-      patch-console: 2.0.0
-      react: 19.2.1
-      react-reconciler: 0.33.0(react@19.2.1)
-      signal-exit: 3.0.7
-      slice-ansi: 7.1.2
-      stack-utils: 2.0.6
-      string-width: 8.1.0
-      type-fest: 4.41.0
-      widest-line: 5.0.0
-      wrap-ansi: 9.0.2
-      ws: 8.18.3
-      yoga-layout: 3.2.1
-    optionalDependencies:
-      '@types/react': 19.2.7
-    transitivePeerDependencies:
-      - bufferutil
-      - utf-8-validate
-
   ipaddr.js@1.9.1: {}
 
   is-fullwidth-code-point@5.1.0:
     dependencies:
       get-east-asian-width: 1.4.0
 
-  is-in-ci@2.0.0: {}
-
   is-interactive@2.0.0: {}
 
   is-number@7.0.0: {}
 
-  is-potential-custom-element-name@1.0.1: {}
+  is-potential-custom-element-name@1.0.1:
+    optional: true
 
   is-promise@4.0.0: {}
 
@@ -4318,8 +4047,6 @@ snapshots:
 
   joycon@3.1.1: {}
 
-  js-tokens@4.0.0: {}
-
   js-tokens@9.0.1: {}
 
   jsdom@27.2.0:
@@ -4348,11 +4075,14 @@ snapshots:
       - bufferutil
       - supports-color
       - utf-8-validate
+    optional: true
 
   json-bignum@0.0.3: {}
 
   json-schema-traverse@1.0.0: {}
 
+  kleur@3.0.3: {}
+
   lilconfig@3.1.3: {}
 
   lines-and-columns@1.2.4: {}
@@ -4395,9 +4125,8 @@ snapshots:
       strip-ansi: 7.1.2
       wrap-ansi: 9.0.2
 
-  lru-cache@11.2.4: {}
-
-  lz-string@1.5.0: {}
+  lru-cache@11.2.4:
+    optional: true
 
   magic-string@0.30.21:
     dependencies:
@@ -4415,7 +4144,8 @@ snapshots:
 
   math-intrinsics@1.1.0: {}
 
-  mdn-data@2.12.2: {}
+  mdn-data@2.12.2:
+    optional: true
 
   media-typer@1.1.0: {}
 
@@ -4438,8 +4168,6 @@ snapshots:
     dependencies:
       mime-db: 1.54.0
 
-  mimic-fn@2.1.0: {}
-
   mimic-function@5.0.1: {}
 
   mimic-response@3.1.0: {}
@@ -4493,10 +4221,6 @@ snapshots:
     dependencies:
       wrappy: 1.0.2
 
-  onetime@5.1.2:
-    dependencies:
-      mimic-fn: 2.1.0
-
   onetime@7.0.0:
     dependencies:
       mimic-function: 5.0.1
@@ -4522,11 +4246,10 @@ snapshots:
   parse5@8.0.0:
     dependencies:
       entities: 6.0.1
+    optional: true
 
   parseurl@1.3.3: {}
 
-  patch-console@2.0.0: {}
-
   path-key@3.1.1: {}
 
   path-to-regexp@8.3.0: {}
@@ -4588,11 +4311,10 @@ snapshots:
       tar-fs: 2.1.4
       tunnel-agent: 0.6.0
 
-  pretty-format@27.5.1:
+  prompts@2.4.2:
     dependencies:
-      ansi-regex: 5.0.1
-      ansi-styles: 5.2.0
-      react-is: 17.0.2
+      kleur: 3.0.3
+      sisteransi: 1.0.5
 
   proxy-addr@2.0.7:
     dependencies:
@@ -4606,7 +4328,8 @@ snapshots:
       end-of-stream: 1.4.5
       once: 1.4.0
 
-  punycode@2.3.1: {}
+  punycode@2.3.1:
+    optional: true
 
   qs@6.14.0:
     dependencies:
@@ -4628,19 +4351,8 @@ snapshots:
       minimist: 1.2.8
       strip-json-comments: 2.0.1
 
-  react-dom@19.2.1(react@19.2.1):
-    dependencies:
-      react: 19.2.1
-      scheduler: 0.27.0
-
-  react-is@17.0.2: {}
-
-  react-reconciler@0.33.0(react@19.2.1):
-    dependencies:
-      react: 19.2.1
-      scheduler: 0.27.0
-
-  react@19.2.1: {}
+  react@19.2.1:
+    optional: true
 
   readable-stream@3.6.2:
     dependencies:
@@ -4660,11 +4372,6 @@ snapshots:
 
   resolve-pkg-maps@1.0.0: {}
 
-  restore-cursor@4.0.0:
-    dependencies:
-      onetime: 5.1.2
-      signal-exit: 3.0.7
-
   restore-cursor@5.1.0:
     dependencies:
       onetime: 7.0.0
@@ -4717,8 +4424,7 @@ snapshots:
   saxes@6.0.0:
     dependencies:
       xmlchars: 2.2.0
-
-  scheduler@0.27.0: {}
+    optional: true
 
   semver@7.7.3: {}
 
@@ -4785,8 +4491,6 @@ snapshots:
 
   siginfo@2.0.0: {}
 
-  signal-exit@3.0.7: {}
-
   signal-exit@4.1.0: {}
 
   simple-concat@1.0.1: {}
@@ -4797,6 +4501,8 @@ snapshots:
       once: 1.4.0
       simple-concat: 1.0.1
 
+  sisteransi@1.0.5: {}
+
   slice-ansi@7.1.2:
     dependencies:
       ansi-styles: 6.2.3
@@ -4813,10 +4519,6 @@ snapshots:
 
   source-map@0.7.6: {}
 
-  stack-utils@2.0.6:
-    dependencies:
-      escape-string-regexp: 2.0.0
-
   stackback@0.0.2: {}
 
   statuses@2.0.2: {}
@@ -4864,7 +4566,8 @@ snapshots:
     dependencies:
       has-flag: 4.0.0
 
-  symbol-tree@3.2.4: {}
+  symbol-tree@3.2.4:
+    optional: true
 
   table-layout@3.0.2:
     dependencies:
@@ -4928,11 +4631,13 @@ snapshots:
 
   tinyrainbow@3.0.3: {}
 
-  tldts-core@7.0.19: {}
+  tldts-core@7.0.19:
+    optional: true
 
   tldts@7.0.19:
     dependencies:
       tldts-core: 7.0.19
+    optional: true
 
   to-regex-range@5.0.1:
     dependencies:
@@ -4943,12 +4648,14 @@ snapshots:
   tough-cookie@6.0.0:
     dependencies:
       tldts: 7.0.19
+    optional: true
 
   tr46@0.0.3: {}
 
   tr46@6.0.0:
     dependencies:
       punycode: 2.3.1
+    optional: true
 
   tree-kill@1.2.2: {}
 
@@ -4995,8 +4702,6 @@ snapshots:
     dependencies:
       safe-buffer: 5.2.1
 
-  type-fest@4.41.0: {}
-
   type-is@2.0.1:
     dependencies:
       content-type: 1.0.5
@@ -5091,23 +4796,28 @@ snapshots:
   w3c-xmlserializer@5.0.0:
     dependencies:
       xml-name-validator: 5.0.0
+    optional: true
 
   wasm-feature-detect@1.8.0: {}
 
   webidl-conversions@3.0.1: {}
 
-  webidl-conversions@8.0.0: {}
+  webidl-conversions@8.0.0:
+    optional: true
 
   whatwg-encoding@3.1.1:
     dependencies:
       iconv-lite: 0.6.3
+    optional: true
 
-  whatwg-mimetype@4.0.0: {}
+  whatwg-mimetype@4.0.0:
+    optional: true
 
   whatwg-url@15.1.0:
     dependencies:
       tr46: 6.0.0
       webidl-conversions: 8.0.0
+    optional: true
 
   whatwg-url@5.0.0:
     dependencies:
@@ -5123,10 +4833,6 @@ snapshots:
       siginfo: 2.0.0
       stackback: 0.0.2
 
-  widest-line@5.0.0:
-    dependencies:
-      string-width: 7.2.0
-
   wordwrapjs@5.1.1: {}
 
   wrap-ansi@9.0.2:
@@ -5137,18 +4843,19 @@ snapshots:
 
   wrappy@1.0.2: {}
 
-  ws@8.18.3: {}
+  ws@8.18.3:
+    optional: true
 
-  xml-name-validator@5.0.0: {}
+  xml-name-validator@5.0.0:
+    optional: true
 
-  xmlchars@2.2.0: {}
+  xmlchars@2.2.0:
+    optional: true
 
   yaml@2.8.2: {}
 
   yoctocolors@2.1.2: {}
 
-  yoga-layout@3.2.1: {}
-
   zlibjs@0.3.1: {}
 
   zod-to-json-schema@3.25.0(zod@3.25.76):
diff --git a/vitest.config.ts b/vitest.config.ts
index 7e5b849..300f4d0 100644
--- a/vitest.config.ts
+++ b/vitest.config.ts
@@ -15,11 +15,8 @@ export default defineConfig({
         '**/*.d.ts',
         '**/test/**',
         // Barrel exports (re-export only, no logic)
-        '**/contexts/index.ts',
-        '**/hooks/index.ts',
-        '**/services/index.ts',
-        '**/components/index.ts',
         '**/providers/index.ts',
+        '**/commands/index.ts',
         'packages/extract/src/index.ts',
       ],
     },

From 8cc0372bcce2c17051264ef6bf766f678a95783c Mon Sep 17 00:00:00 2001
From: prosdev <pros.seng@contentstack.com>
Date: Mon, 8 Dec 2025 10:41:24 -0800
Subject: [PATCH 5/5] test(extract): update schema tests for new field mappings

- Test vendor field alternatives (store_name, merchant, business_name)
- Test amount field alternatives (total, total_amount)
- Test date normalization and dateRaw preservation
---
 packages/cli/src/commands/extract.ts          | 24 +++----
 .../extract/src/__tests__/extract.test.ts     |  8 +--
 packages/extract/src/__tests__/mime.test.ts   |  1 -
 packages/extract/src/__tests__/ocr.test.ts    | 10 ++-
 packages/extract/src/__tests__/pdf.test.ts    | 33 ++++++----
 .../extract/src/__tests__/schemas.test.ts     | 66 +++++++++++++++----
 packages/extract/src/index.ts                 | 15 +++--
 packages/extract/src/mime.ts                  |  1 -
 packages/extract/src/providers/gemini.ts      |  1 -
 packages/extract/src/providers/index.ts       |  1 -
 packages/extract/src/providers/ollama.ts      |  8 ++-
 packages/extract/src/schemas.ts               |  7 +-
 packages/extract/src/types.ts                 |  1 -
 13 files changed, 110 insertions(+), 66 deletions(-)

diff --git a/packages/cli/src/commands/extract.ts b/packages/cli/src/commands/extract.ts
index 748caf2..ffaa8bd 100644
--- a/packages/cli/src/commands/extract.ts
+++ b/packages/cli/src/commands/extract.ts
@@ -1,5 +1,5 @@
 import { resolve } from 'node:path';
-import type { Config, DocumentData } from '@doc-agent/core';
+import type { Config } from '@doc-agent/core';
 import { extractDocument, type StreamChunk } from '@doc-agent/extract';
 import { storage } from '@doc-agent/storage';
 import kero from '@lytics/kero';
@@ -11,14 +11,15 @@ import {
   checkOllamaInstalled,
   checkOllamaRunning,
   installOllama,
+  type PullProgress,
   pullModel,
   startOllama,
   waitForOllama,
-  type PullProgress,
 } from '../services/ollama';
 
 const logger = kero.createLogger({
-  level: (process.env.LOG_LEVEL as 'trace' | 'debug' | 'info' | 'warn' | 'error' | 'fatal') || 'info',
+  level:
+    (process.env.LOG_LEVEL as 'trace' | 'debug' | 'info' | 'warn' | 'error' | 'fatal') || 'info',
 });
 
 export interface ExtractOptions {
@@ -182,10 +183,10 @@ export async function runExtract(file: string, options: ExtractOptions): Promise
     const result = await extractDocument(absolutePath, config, {
       onStream: (chunk: StreamChunk) => {
         if (!chunk) return;
-        
+
         if (chunk.type === 'log') {
           // Log via kero - use simple string logging to avoid issues
-          const msg = `${chunk.message}${chunk.data ? ' ' + JSON.stringify(chunk.data) : ''}`;
+          const msg = `${chunk.message}${chunk.data ? ` ${JSON.stringify(chunk.data)}` : ''}`;
           if (chunk.level === 'error') {
             logger.error(msg);
           } else if (chunk.level === 'warn') {
@@ -195,7 +196,7 @@ export async function runExtract(file: string, options: ExtractOptions): Promise
           } else {
             logger.info(msg);
           }
-          
+
           // Update spinner for info logs
           if (chunk.level === 'info') {
             spinner.text = chunk.message;
@@ -219,7 +220,10 @@ export async function runExtract(file: string, options: ExtractOptions): Promise
     });
 
     spinner.succeed('Extraction complete');
-    logger.info({ type: result.type, itemCount: result.items?.length ?? 0 }, 'Extraction successful');
+    logger.info(
+      { type: result.type, itemCount: result.items?.length ?? 0 },
+      'Extraction successful'
+    );
 
     // Save to database (unless dry run)
     if (!dryRun) {
@@ -240,19 +244,17 @@ export async function runExtract(file: string, options: ExtractOptions): Promise
     // Print result
     console.log(chalk.gray('─'.repeat(40)));
     console.log(JSON.stringify(result, null, 2));
-
   } catch (error) {
     spinner.fail('Extraction failed');
     logger.error({ error: String(error) }, 'Extraction failed');
-    
+
     // Show the prompt for debugging if available
     if (lastPrompt) {
       console.log(chalk.gray('\n─── Last Prompt ───'));
       console.log(chalk.gray(lastPrompt.slice(-500))); // Last 500 chars
     }
-    
+
     console.error(chalk.red(String(error)));
     process.exitCode = 1;
   }
 }
-
diff --git a/packages/extract/src/__tests__/extract.test.ts b/packages/extract/src/__tests__/extract.test.ts
index 9b6d956..c9bdc92 100644
--- a/packages/extract/src/__tests__/extract.test.ts
+++ b/packages/extract/src/__tests__/extract.test.ts
@@ -25,9 +25,9 @@ vi.mock('@lytics/kero', () => ({
 }));
 
 import type { Config } from '@doc-agent/core';
+import { extractDocument } from '../extract';
 import { extractWithGemini } from '../providers/gemini';
 import { extractWithOllama } from '../providers/ollama';
-import { extractDocument } from '../extract';
 
 describe('extractDocument', () => {
   beforeEach(() => {
@@ -127,10 +127,6 @@ describe('extractDocument', () => {
 
     // base64 of 'fake-file-content'
     const expectedBase64 = Buffer.from('fake-file-content').toString('base64');
-    expect(extractWithGemini).toHaveBeenCalledWith(
-      '/path/to/test.pdf',
-      expectedBase64,
-      config
-    );
+    expect(extractWithGemini).toHaveBeenCalledWith('/path/to/test.pdf', expectedBase64, config);
   });
 });
diff --git a/packages/extract/src/__tests__/mime.test.ts b/packages/extract/src/__tests__/mime.test.ts
index a2c1a2c..94f182c 100644
--- a/packages/extract/src/__tests__/mime.test.ts
+++ b/packages/extract/src/__tests__/mime.test.ts
@@ -37,4 +37,3 @@ describe('getMimeType', () => {
     expect(getMimeType('image.backup.png')).toBe('image/png');
   });
 });
-
diff --git a/packages/extract/src/__tests__/ocr.test.ts b/packages/extract/src/__tests__/ocr.test.ts
index ce29abd..1bd11c3 100644
--- a/packages/extract/src/__tests__/ocr.test.ts
+++ b/packages/extract/src/__tests__/ocr.test.ts
@@ -50,11 +50,7 @@ describe('ocrImages', () => {
       .mockResolvedValueOnce({ data: { text: 'Page 2 content' } } as Tesseract.RecognizeResult)
       .mockResolvedValueOnce({ data: { text: 'Page 3 content' } } as Tesseract.RecognizeResult);
 
-    const images = [
-      Buffer.from('image1'),
-      Buffer.from('image2'),
-      Buffer.from('image3'),
-    ];
+    const images = [Buffer.from('image1'), Buffer.from('image2'), Buffer.from('image3')];
     const result = await ocrImages(images);
 
     expect(result).toContain('--- Page 1 ---');
@@ -116,7 +112,9 @@ describe('ocrImages', () => {
     const mockRecognize = vi.mocked(Tesseract.recognize);
 
     mockRecognize.mockImplementation((_image, _lang, options) => {
-      const logger = options?.logger as ((m: { status: string; progress: number }) => void) | undefined;
+      const logger = options?.logger as
+        | ((m: { status: string; progress: number }) => void)
+        | undefined;
       if (logger) {
         logger({ status: 'loading tesseract core', progress: 0.5 }); // Should be ignored
         logger({ status: 'recognizing text', progress: 1.0 }); // Should be called
diff --git a/packages/extract/src/__tests__/pdf.test.ts b/packages/extract/src/__tests__/pdf.test.ts
index 426d30a..d42db9d 100644
--- a/packages/extract/src/__tests__/pdf.test.ts
+++ b/packages/extract/src/__tests__/pdf.test.ts
@@ -25,36 +25,39 @@ describe('pdfToImages', () => {
 
   it('should convert PDF to array of image buffers', async () => {
     const mockPdf = vi.mocked(pdf);
-    const mockPages = [
-      new Uint8Array([1, 2, 3]),
-      new Uint8Array([4, 5, 6]),
-    ];
+    const mockPages = [Buffer.from([1, 2, 3]), Buffer.from([4, 5, 6])];
 
-    // Create async iterator
+    // Create mock document with required properties
     mockPdf.mockResolvedValueOnce({
+      length: mockPages.length,
+      metadata: {} as never,
+      getPage: vi.fn(),
       [Symbol.asyncIterator]: async function* () {
         for (const page of mockPages) {
           yield page;
         }
       },
-    } as AsyncIterable<Uint8Array>);
+    });
 
     const result = await pdfToImages('/path/to/test.pdf');
 
     expect(result).toHaveLength(2);
-    expect(result![0]).toBeInstanceOf(Buffer);
-    expect(result![1]).toBeInstanceOf(Buffer);
-    expect(mockPdf).toHaveBeenCalledWith('/path/to/test.pdf', { scale: 2 });
+    expect(result?.[0]).toBeInstanceOf(Buffer);
+    expect(result?.[1]).toBeInstanceOf(Buffer);
+    expect(mockPdf).toHaveBeenCalledWith('/path/to/test.pdf', { scale: 3 });
   });
 
   it('should return null for empty PDF', async () => {
     const mockPdf = vi.mocked(pdf);
 
     mockPdf.mockResolvedValueOnce({
+      length: 0,
+      metadata: {} as never,
+      getPage: vi.fn(),
       [Symbol.asyncIterator]: async function* () {
         // No pages
       },
-    } as AsyncIterable<Uint8Array>);
+    });
 
     const result = await pdfToImages('/path/to/empty.pdf');
 
@@ -72,18 +75,20 @@ describe('pdfToImages', () => {
 
   it('should handle single page PDF', async () => {
     const mockPdf = vi.mocked(pdf);
-    const mockPage = new Uint8Array([1, 2, 3, 4, 5]);
+    const mockPage = Buffer.from([1, 2, 3, 4, 5]);
 
     mockPdf.mockResolvedValueOnce({
+      length: 1,
+      metadata: {} as never,
+      getPage: vi.fn(),
       [Symbol.asyncIterator]: async function* () {
         yield mockPage;
       },
-    } as AsyncIterable<Uint8Array>);
+    });
 
     const result = await pdfToImages('/path/to/single-page.pdf');
 
     expect(result).toHaveLength(1);
-    expect(Buffer.from(result![0])).toEqual(Buffer.from(mockPage));
+    expect(result?.[0]).toEqual(mockPage);
   });
 });
-
diff --git a/packages/extract/src/__tests__/schemas.test.ts b/packages/extract/src/__tests__/schemas.test.ts
index 2980dd8..69a47f4 100644
--- a/packages/extract/src/__tests__/schemas.test.ts
+++ b/packages/extract/src/__tests__/schemas.test.ts
@@ -6,15 +6,15 @@ describe('LineItemSchema', () => {
     const result = LineItemSchema.parse({
       description: 'Coffee',
       quantity: 2,
-      unitPrice: 3.50,
-      total: 7.00,
+      unitPrice: 3.5,
+      total: 7.0,
     });
 
     expect(result).toEqual({
       description: 'Coffee',
       quantity: 2,
-      unitPrice: 3.50,
-      total: 7.00,
+      unitPrice: 3.5,
+      total: 7.0,
     });
   });
 
@@ -27,14 +27,14 @@ describe('LineItemSchema', () => {
     });
 
     expect(result.quantity).toBe(2);
-    expect(result.unitPrice).toBe(3.50);
-    expect(result.total).toBe(7.00);
+    expect(result.unitPrice).toBe(3.5);
+    expect(result.total).toBe(7.0);
   });
 
   it('should normalize price to total', () => {
     const result = LineItemSchema.parse({
-      description: 'Item',
       price: 9.99, // Some models output "price" instead of "total"
+      name: 'Item',
     });
 
     expect(result.total).toBe(9.99);
@@ -43,11 +43,11 @@ describe('LineItemSchema', () => {
   it('should prefer total over price when both present', () => {
     const result = LineItemSchema.parse({
       description: 'Item',
-      total: 10.00,
-      price: 5.00,
+      total: 10.0,
+      price: 5.0,
     });
 
-    expect(result.total).toBe(10.00);
+    expect(result.total).toBe(10.0);
   });
 
   it('should handle missing optional fields', () => {
@@ -62,6 +62,42 @@ describe('LineItemSchema', () => {
       total: undefined,
     });
   });
+
+  it('should normalize name to description', () => {
+    const result = LineItemSchema.parse({
+      name: 'Product Name',
+      price: 5.99,
+    });
+
+    expect(result.description).toBe('Product Name');
+  });
+
+  it('should normalize item to description', () => {
+    const result = LineItemSchema.parse({
+      item: 'Line Item',
+      amount: 10.0,
+    });
+
+    expect(result.description).toBe('Line Item');
+    expect(result.total).toBe(10.0);
+  });
+
+  it('should default description to Unknown item when missing', () => {
+    const result = LineItemSchema.parse({
+      total: 5.0,
+    });
+
+    expect(result.description).toBe('Unknown item');
+  });
+
+  it('should normalize qty to quantity', () => {
+    const result = LineItemSchema.parse({
+      description: 'Item',
+      qty: 3,
+    });
+
+    expect(result.quantity).toBe(3);
+  });
 });
 
 describe('DocumentDataSchema', () => {
@@ -120,6 +156,15 @@ describe('DocumentDataSchema', () => {
     expect(result.amount).toBe(99.99);
   });
 
+  it('should preserve negative amounts (refunds/credits)', () => {
+    const result = DocumentDataSchema.parse({
+      type: 'receipt',
+      amount: -50.0,
+    });
+
+    expect(result.amount).toBe(-50.0);
+  });
+
   it('should parse all valid document types', () => {
     const types = ['invoice', 'receipt', 'bank_statement', 'other'] as const;
 
@@ -129,4 +174,3 @@ describe('DocumentDataSchema', () => {
     }
   });
 });
-
diff --git a/packages/extract/src/index.ts b/packages/extract/src/index.ts
index 1dfa3fc..32a5f4f 100644
--- a/packages/extract/src/index.ts
+++ b/packages/extract/src/index.ts
@@ -7,15 +7,18 @@
 
 // Main extraction function
 export { extractDocument } from './extract';
-
-// Types
-export type { ExtractOptions, LogLevel, OcrProgressCallback, StreamCallback, StreamChunk } from './types';
-
 // Utilities (for testing/advanced usage)
 export { getMimeType } from './mime';
 export { ocrImages } from './ocr';
 export { pdfToImages } from './pdf';
-export { DocumentDataSchema, LineItemSchema } from './schemas';
-
 // Providers (for direct access if needed)
 export { extractWithGemini, extractWithOllama } from './providers';
+export { DocumentDataSchema, LineItemSchema } from './schemas';
+// Types
+export type {
+  ExtractOptions,
+  LogLevel,
+  OcrProgressCallback,
+  StreamCallback,
+  StreamChunk,
+} from './types';
diff --git a/packages/extract/src/mime.ts b/packages/extract/src/mime.ts
index 4b7d293..d3e6bcd 100644
--- a/packages/extract/src/mime.ts
+++ b/packages/extract/src/mime.ts
@@ -20,4 +20,3 @@ export function getMimeType(filePath: string): string {
   const ext = extname(filePath).toLowerCase();
   return MIME_TYPES[ext] || 'application/pdf';
 }
-
diff --git a/packages/extract/src/providers/gemini.ts b/packages/extract/src/providers/gemini.ts
index 7faf3db..3f06993 100644
--- a/packages/extract/src/providers/gemini.ts
+++ b/packages/extract/src/providers/gemini.ts
@@ -48,4 +48,3 @@ Only respond with valid JSON, no markdown formatting.`;
     ...extracted,
   };
 }
-
diff --git a/packages/extract/src/providers/index.ts b/packages/extract/src/providers/index.ts
index d17019d..71bd4e8 100644
--- a/packages/extract/src/providers/index.ts
+++ b/packages/extract/src/providers/index.ts
@@ -1,3 +1,2 @@
 export { extractWithGemini } from './gemini';
 export { extractWithOllama } from './ollama';
-
diff --git a/packages/extract/src/providers/ollama.ts b/packages/extract/src/providers/ollama.ts
index 571495e..5f6ece0 100644
--- a/packages/extract/src/providers/ollama.ts
+++ b/packages/extract/src/providers/ollama.ts
@@ -65,7 +65,9 @@ export async function extractWithOllama(
       });
 
       // OCR all pages in parallel for text reference
-      emitLog(onStream, 'info', `Running OCR on ${pages.length} page(s)`, { pageCount: pages.length });
+      emitLog(onStream, 'info', `Running OCR on ${pages.length} page(s)`, {
+        pageCount: pages.length,
+      });
       if (onStream) {
         onStream({ type: 'prompt', content: `Running OCR on ${pages.length} page(s)...` });
       }
@@ -221,8 +223,8 @@ General rules:
       type: (parsed as Record<string, unknown>).type,
       vendor: (parsed as Record<string, unknown>).vendor,
       amount: (parsed as Record<string, unknown>).amount,
-      itemCount: Array.isArray((parsed as Record<string, unknown>).items) 
-        ? ((parsed as Record<string, unknown>).items as unknown[]).length 
+      itemCount: Array.isArray((parsed as Record<string, unknown>).items)
+        ? ((parsed as Record<string, unknown>).items as unknown[]).length
         : 0,
     });
 
diff --git a/packages/extract/src/schemas.ts b/packages/extract/src/schemas.ts
index d45bec0..c8fdf48 100644
--- a/packages/extract/src/schemas.ts
+++ b/packages/extract/src/schemas.ts
@@ -43,14 +43,14 @@ export const LineItemSchema = z
  */
 function normalizeDate(dateStr: string | null | undefined): string | undefined {
   if (!dateStr) return undefined;
-  
+
   // Try to parse common formats
   const parsed = new Date(dateStr);
   if (!Number.isNaN(parsed.getTime())) {
     // Valid date - format as YYYY-MM-DD
     return parsed.toISOString().split('T')[0];
   }
-  
+
   // Try MM/DD/YY format (common in US receipts)
   const mdyMatch = dateStr.match(/^(\d{1,2})\/(\d{1,2})\/(\d{2,4})/);
   if (mdyMatch) {
@@ -61,7 +61,7 @@ function normalizeDate(dateStr: string | null | undefined): string | undefined {
       return normalized.toISOString().split('T')[0];
     }
   }
-  
+
   return undefined;
 }
 
@@ -98,4 +98,3 @@ export const DocumentDataSchema = z
   });
 
 export type ValidatedDocumentData = z.infer<typeof DocumentDataSchema>;
-
diff --git a/packages/extract/src/types.ts b/packages/extract/src/types.ts
index 99e5496..51de028 100644
--- a/packages/extract/src/types.ts
+++ b/packages/extract/src/types.ts
@@ -24,4 +24,3 @@ export type OcrProgressCallback = (
   progress: number,
   status: string
 ) => void;
-