Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,10 @@ pnpm-debug.log*

# OS
.DS_Store
Thumbs.db
Thumbs.db

# Others
tmp/

# Tesseract.js language data (downloaded at runtime)
*.traineddata
Binary file added examples/tacqueria-receipt.pdf
Binary file not shown.
8 changes: 8 additions & 0 deletions packages/cli/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,20 @@
"@modelcontextprotocol/sdk": "^1.24.3",
"chalk": "^5.6.2",
"commander": "^14.0.2",
"ink": "^6.5.1",
"ink-spinner": "^5.0.0",
"ora": "^9.0.0",
"react": "^19.2.1",
"vectordb": "^0.21.2",
"zod": "^3.23.8"
},
"devDependencies": {
"@testing-library/dom": "^10.4.1",
"@testing-library/react": "^16.3.0",
"@types/node": "^24.10.1",
"@types/react": "^19.2.7",
"ink-testing-library": "^4.0.0",
"jsdom": "^27.2.0",
"tsx": "^4.21.0",
"typescript": "^5.9.3"
}
Expand Down
97 changes: 33 additions & 64 deletions packages/cli/src/cli.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
#!/usr/bin/env node
import { exec } from 'node:child_process';
import { resolve } from 'node:path';
import { promisify } from 'node:util';
import type { Config } from '@doc-agent/core';
import { extractDocument } from '@doc-agent/extract';
import { storage } from '@doc-agent/storage';
import chalk from 'chalk';
import { Command } from 'commander';
import ora from 'ora';
import { render } from 'ink';
import React from 'react';
import { ExtractApp } from './components/ExtractApp';

const execAsync = promisify(exec);
// Resolve paths relative to where user ran the command
// INIT_CWD is set by pnpm to original working directory
const cwd = process.env.INIT_CWD || process.cwd();
function resolvePath(filePath: string): string {
return resolve(cwd, filePath);
}

const program = new Command();

Expand All @@ -19,72 +21,39 @@ program
.description('Document extraction and semantic search CLI')
.version('0.1.0');

async function ensureOllamaModel(model: string) {
const spinner = ora(`Checking for Ollama model: ${model}...`).start();
try {
const response = await fetch('http://localhost:11434/api/tags');
if (!response.ok) {
throw new Error('Ollama is not running. Please start Ollama first.');
}
const data = (await response.json()) as { models: { name: string }[] };
const modelExists = data.models.some((m) => m.name.includes(model));

if (!modelExists) {
spinner.text = `Pulling Ollama model: ${model} (this may take a while)...`;
// Use exec to pull so we can potentially see output or just wait
// Using the API to pull would be better for progress, but for now CLI is robust
await execAsync(`ollama pull ${model}`);
spinner.succeed(`Model ${model} ready.`);
} else {
spinner.succeed(`Model ${model} found.`);
}
} catch (error) {
spinner.fail('Failed to check/pull Ollama model.');
throw error;
}
}

program
.command('extract <file>')
.description('Extract structured data from a document')
.option('-p, --provider <provider>', 'AI provider (gemini|openai|ollama)', 'ollama')
.option(
'-m, --model <model>',
'Model to use (default: llama3.2-vision for ollama)',
'llama3.2-vision'
)
.option('-m, --model <model>', 'Model to use (ollama: llama3.2-vision, gemini: gemini-2.5-flash)')
.option('-d, --dry-run', 'Print JSON only, do not save to database', false)
.action(async (file: string, options) => {
try {
if (options.provider === 'ollama') {
await ensureOllamaModel(options.model);
}

const spinner = ora('Extracting document data...').start();

const config: Config = {
aiProvider: options.provider,
geminiApiKey: process.env.GEMINI_API_KEY,
openaiApiKey: process.env.OPENAI_API_KEY,
ollamaModel: options.model,
};
const absolutePath = resolvePath(file);

const result = await extractDocument(file, config);
// Set default model based on provider if not specified
const defaultModels: Record<string, string> = {
ollama: 'llama3.2-vision',
gemini: 'gemini-2.5-flash',
openai: 'gpt-4o',
};
const model = options.model || defaultModels[options.provider] || 'llama3.2-vision';

if (options.dryRun) {
spinner.succeed(chalk.green('Extraction complete (dry run)'));
} else {
const absolutePath = resolve(file);
await storage.saveDocument(result, absolutePath);
spinner.succeed(chalk.green(`Saved: ${result.filename} (ID: ${result.id})`));
}
const { waitUntilExit } = render(
React.createElement(ExtractApp, {
file: absolutePath,
provider: options.provider,
model,
dryRun: options.dryRun,
onComplete: () => {
// Normal exit
},
onError: () => {
process.exitCode = 1;
},
})
);

console.log(JSON.stringify(result, null, 2));
} catch (error) {
console.error(chalk.red('\nExtraction failed:'));
console.error((error as Error).message);
process.exit(1);
}
await waitUntilExit();
});

program
Expand Down
58 changes: 58 additions & 0 deletions packages/cli/src/components/ConfirmInput.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import { Box, Text, useInput } from 'ink';

interface ConfirmInputInteractiveProps {
message: string;
onConfirm: (confirmed: boolean) => void;
defaultValue: boolean;
}

function ConfirmInputInteractive({
message,
onConfirm,
defaultValue,
}: ConfirmInputInteractiveProps) {
useInput((input, key) => {
if (input.toLowerCase() === 'y' || (key.return && defaultValue)) {
onConfirm(true);
} else if (input.toLowerCase() === 'n' || (key.return && !defaultValue)) {
onConfirm(false);
}
});

return (
<Box>
<Text>{message} </Text>
<Text color="gray">{defaultValue ? '[Y/n]' : '[y/N]'}</Text>
</Box>
);
}

interface ConfirmInputProps {
message: string;
onConfirm: (confirmed: boolean) => void;
defaultValue?: boolean;
/** Whether stdin supports raw mode (interactive input) */
isInteractive: boolean;
}

export function ConfirmInput({
message,
onConfirm,
defaultValue = true,
isInteractive,
}: ConfirmInputProps) {
// Non-interactive: just show message, caller handles auto-confirm
if (!isInteractive) {
return (
<Box>
<Text>{message} </Text>
<Text color="gray">(auto: {defaultValue ? 'yes' : 'no'})</Text>
</Box>
);
}

// Interactive mode with useInput
return (
<ConfirmInputInteractive message={message} onConfirm={onConfirm} defaultValue={defaultValue} />
);
}
32 changes: 32 additions & 0 deletions packages/cli/src/components/ErrorDisplay.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import { Box, Text } from 'ink';

interface ErrorDisplayProps {
title: string;
message: string;
suggestions?: string[];
}

export function ErrorDisplay({ title, message, suggestions }: ErrorDisplayProps) {
return (
<Box flexDirection="column" marginTop={1}>
<Box>
<Text color="red" bold>
✗ {title}
</Text>
</Box>
<Box marginLeft={2}>
<Text color="gray">{message}</Text>
</Box>
{suggestions && suggestions.length > 0 && (
<Box flexDirection="column" marginTop={1} marginLeft={2}>
<Text color="yellow">Suggestions:</Text>
{suggestions.map((suggestion) => (
<Text key={suggestion} color="gray">
• {suggestion}
</Text>
))}
</Box>
)}
</Box>
);
}
91 changes: 91 additions & 0 deletions packages/cli/src/components/ExtractApp.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import { Box, useStdin } from 'ink';
import {
ExtractionProvider,
type ExtractionService,
OllamaProvider,
type OllamaService,
} from '../contexts';
import { useExtraction } from '../hooks/useExtraction';
import { useOllama } from '../hooks/useOllama';
import { ExtractionProgress } from './ExtractionProgress';
import { OllamaStatus } from './OllamaStatus';
import { Result } from './Result';
import { StreamingOutput } from './StreamingOutput';

export interface ExtractAppProps {
file: string;
provider: 'gemini' | 'openai' | 'ollama';
model: string;
dryRun: boolean;
onComplete: () => void;
onError: (error: Error) => void;
// Optional services for testing
ollamaService?: OllamaService;
extractionService?: ExtractionService;
}

function ExtractAppInner({
file,
provider,
model,
dryRun,
onComplete,
onError,
}: Omit<ExtractAppProps, 'ollamaService' | 'extractionService'>) {
const { isRawModeSupported } = useStdin();

const ollama = useOllama({
provider,
model,
isInteractive: isRawModeSupported,
});

const extraction = useExtraction({
file,
provider,
model,
dryRun,
shouldStart: ollama.isReady,
onComplete,
onError,
});

return (
<Box flexDirection="column" padding={1}>
<OllamaStatus
state={ollama.state}
isInteractive={isRawModeSupported}
onInstallConfirm={
ollama.state.status === 'prompt-install' ? ollama.handleInstallConfirm : undefined
}
onStartConfirm={
ollama.state.status === 'prompt-start' ? ollama.handleStartConfirm : undefined
}
/>

{ollama.state.status === 'ready' && (
<Box marginTop={1}>
<ExtractionProgress state={extraction.state} dryRun={dryRun} />
</Box>
)}

{extraction.state.status === 'extracting' && extraction.responseContent && (
<StreamingOutput content={extraction.responseContent} />
)}

{extraction.result && extraction.state.status === 'complete' && (
<Result data={extraction.result} showJson={true} />
)}
</Box>
);
}

export function ExtractApp({ ollamaService, extractionService, ...props }: ExtractAppProps) {
return (
<OllamaProvider service={ollamaService}>
<ExtractionProvider service={extractionService}>
<ExtractAppInner {...props} />
</ExtractionProvider>
</OllamaProvider>
);
}
Loading