Skip to content

Commit f7f1317

Browse files
authored
Merge pull request #11 from prosdevlab/feat/ink-cli-ux
feat: Ink CLI with rich UX and OCR preprocessing
2 parents 35eb09f + ae3d2be commit f7f1317

33 files changed

+3420
-164
lines changed

.gitignore

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,4 +48,10 @@ pnpm-debug.log*
4848

4949
# OS
5050
.DS_Store
51-
Thumbs.db
51+
Thumbs.db
52+
53+
# Others
54+
tmp/
55+
56+
# Tesseract.js language data (downloaded at runtime)
57+
*.traineddata

examples/tacqueria-receipt.pdf

503 KB
Binary file not shown.

packages/cli/package.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,20 @@
2323
"@modelcontextprotocol/sdk": "^1.24.3",
2424
"chalk": "^5.6.2",
2525
"commander": "^14.0.2",
26+
"ink": "^6.5.1",
27+
"ink-spinner": "^5.0.0",
2628
"ora": "^9.0.0",
29+
"react": "^19.2.1",
2730
"vectordb": "^0.21.2",
2831
"zod": "^3.23.8"
2932
},
3033
"devDependencies": {
34+
"@testing-library/dom": "^10.4.1",
35+
"@testing-library/react": "^16.3.0",
3136
"@types/node": "^24.10.1",
37+
"@types/react": "^19.2.7",
38+
"ink-testing-library": "^4.0.0",
39+
"jsdom": "^27.2.0",
3240
"tsx": "^4.21.0",
3341
"typescript": "^5.9.3"
3442
}

packages/cli/src/cli.ts

Lines changed: 33 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,17 @@
11
#!/usr/bin/env node
2-
import { exec } from 'node:child_process';
32
import { resolve } from 'node:path';
4-
import { promisify } from 'node:util';
5-
import type { Config } from '@doc-agent/core';
6-
import { extractDocument } from '@doc-agent/extract';
7-
import { storage } from '@doc-agent/storage';
83
import chalk from 'chalk';
94
import { Command } from 'commander';
10-
import ora from 'ora';
5+
import { render } from 'ink';
6+
import React from 'react';
7+
import { ExtractApp } from './components/ExtractApp';
118

12-
const execAsync = promisify(exec);
9+
// Resolve paths relative to where user ran the command
10+
// INIT_CWD is set by pnpm to original working directory
11+
const cwd = process.env.INIT_CWD || process.cwd();
12+
function resolvePath(filePath: string): string {
13+
return resolve(cwd, filePath);
14+
}
1315

1416
const program = new Command();
1517

@@ -19,72 +21,39 @@ program
1921
.description('Document extraction and semantic search CLI')
2022
.version('0.1.0');
2123

22-
async function ensureOllamaModel(model: string) {
23-
const spinner = ora(`Checking for Ollama model: ${model}...`).start();
24-
try {
25-
const response = await fetch('http://localhost:11434/api/tags');
26-
if (!response.ok) {
27-
throw new Error('Ollama is not running. Please start Ollama first.');
28-
}
29-
const data = (await response.json()) as { models: { name: string }[] };
30-
const modelExists = data.models.some((m) => m.name.includes(model));
31-
32-
if (!modelExists) {
33-
spinner.text = `Pulling Ollama model: ${model} (this may take a while)...`;
34-
// Use exec to pull so we can potentially see output or just wait
35-
// Using the API to pull would be better for progress, but for now CLI is robust
36-
await execAsync(`ollama pull ${model}`);
37-
spinner.succeed(`Model ${model} ready.`);
38-
} else {
39-
spinner.succeed(`Model ${model} found.`);
40-
}
41-
} catch (error) {
42-
spinner.fail('Failed to check/pull Ollama model.');
43-
throw error;
44-
}
45-
}
46-
4724
program
4825
.command('extract <file>')
4926
.description('Extract structured data from a document')
5027
.option('-p, --provider <provider>', 'AI provider (gemini|openai|ollama)', 'ollama')
51-
.option(
52-
'-m, --model <model>',
53-
'Model to use (default: llama3.2-vision for ollama)',
54-
'llama3.2-vision'
55-
)
28+
.option('-m, --model <model>', 'Model to use (ollama: llama3.2-vision, gemini: gemini-2.5-flash)')
5629
.option('-d, --dry-run', 'Print JSON only, do not save to database', false)
5730
.action(async (file: string, options) => {
58-
try {
59-
if (options.provider === 'ollama') {
60-
await ensureOllamaModel(options.model);
61-
}
62-
63-
const spinner = ora('Extracting document data...').start();
64-
65-
const config: Config = {
66-
aiProvider: options.provider,
67-
geminiApiKey: process.env.GEMINI_API_KEY,
68-
openaiApiKey: process.env.OPENAI_API_KEY,
69-
ollamaModel: options.model,
70-
};
31+
const absolutePath = resolvePath(file);
7132

72-
const result = await extractDocument(file, config);
33+
// Set default model based on provider if not specified
34+
const defaultModels: Record<string, string> = {
35+
ollama: 'llama3.2-vision',
36+
gemini: 'gemini-2.5-flash',
37+
openai: 'gpt-4o',
38+
};
39+
const model = options.model || defaultModels[options.provider] || 'llama3.2-vision';
7340

74-
if (options.dryRun) {
75-
spinner.succeed(chalk.green('Extraction complete (dry run)'));
76-
} else {
77-
const absolutePath = resolve(file);
78-
await storage.saveDocument(result, absolutePath);
79-
spinner.succeed(chalk.green(`Saved: ${result.filename} (ID: ${result.id})`));
80-
}
41+
const { waitUntilExit } = render(
42+
React.createElement(ExtractApp, {
43+
file: absolutePath,
44+
provider: options.provider,
45+
model,
46+
dryRun: options.dryRun,
47+
onComplete: () => {
48+
// Normal exit
49+
},
50+
onError: () => {
51+
process.exitCode = 1;
52+
},
53+
})
54+
);
8155

82-
console.log(JSON.stringify(result, null, 2));
83-
} catch (error) {
84-
console.error(chalk.red('\nExtraction failed:'));
85-
console.error((error as Error).message);
86-
process.exit(1);
87-
}
56+
await waitUntilExit();
8857
});
8958

9059
program
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
import { Box, Text, useInput } from 'ink';
2+
3+
interface ConfirmInputInteractiveProps {
4+
message: string;
5+
onConfirm: (confirmed: boolean) => void;
6+
defaultValue: boolean;
7+
}
8+
9+
function ConfirmInputInteractive({
10+
message,
11+
onConfirm,
12+
defaultValue,
13+
}: ConfirmInputInteractiveProps) {
14+
useInput((input, key) => {
15+
if (input.toLowerCase() === 'y' || (key.return && defaultValue)) {
16+
onConfirm(true);
17+
} else if (input.toLowerCase() === 'n' || (key.return && !defaultValue)) {
18+
onConfirm(false);
19+
}
20+
});
21+
22+
return (
23+
<Box>
24+
<Text>{message} </Text>
25+
<Text color="gray">{defaultValue ? '[Y/n]' : '[y/N]'}</Text>
26+
</Box>
27+
);
28+
}
29+
30+
interface ConfirmInputProps {
31+
message: string;
32+
onConfirm: (confirmed: boolean) => void;
33+
defaultValue?: boolean;
34+
/** Whether stdin supports raw mode (interactive input) */
35+
isInteractive: boolean;
36+
}
37+
38+
export function ConfirmInput({
39+
message,
40+
onConfirm,
41+
defaultValue = true,
42+
isInteractive,
43+
}: ConfirmInputProps) {
44+
// Non-interactive: just show message, caller handles auto-confirm
45+
if (!isInteractive) {
46+
return (
47+
<Box>
48+
<Text>{message} </Text>
49+
<Text color="gray">(auto: {defaultValue ? 'yes' : 'no'})</Text>
50+
</Box>
51+
);
52+
}
53+
54+
// Interactive mode with useInput
55+
return (
56+
<ConfirmInputInteractive message={message} onConfirm={onConfirm} defaultValue={defaultValue} />
57+
);
58+
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import { Box, Text } from 'ink';
2+
3+
interface ErrorDisplayProps {
4+
title: string;
5+
message: string;
6+
suggestions?: string[];
7+
}
8+
9+
export function ErrorDisplay({ title, message, suggestions }: ErrorDisplayProps) {
10+
return (
11+
<Box flexDirection="column" marginTop={1}>
12+
<Box>
13+
<Text color="red" bold>
14+
{title}
15+
</Text>
16+
</Box>
17+
<Box marginLeft={2}>
18+
<Text color="gray">{message}</Text>
19+
</Box>
20+
{suggestions && suggestions.length > 0 && (
21+
<Box flexDirection="column" marginTop={1} marginLeft={2}>
22+
<Text color="yellow">Suggestions:</Text>
23+
{suggestions.map((suggestion) => (
24+
<Text key={suggestion} color="gray">
25+
{suggestion}
26+
</Text>
27+
))}
28+
</Box>
29+
)}
30+
</Box>
31+
);
32+
}
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
import { Box, useStdin } from 'ink';
2+
import {
3+
ExtractionProvider,
4+
type ExtractionService,
5+
OllamaProvider,
6+
type OllamaService,
7+
} from '../contexts';
8+
import { useExtraction } from '../hooks/useExtraction';
9+
import { useOllama } from '../hooks/useOllama';
10+
import { ExtractionProgress } from './ExtractionProgress';
11+
import { OllamaStatus } from './OllamaStatus';
12+
import { Result } from './Result';
13+
import { StreamingOutput } from './StreamingOutput';
14+
15+
export interface ExtractAppProps {
16+
file: string;
17+
provider: 'gemini' | 'openai' | 'ollama';
18+
model: string;
19+
dryRun: boolean;
20+
onComplete: () => void;
21+
onError: (error: Error) => void;
22+
// Optional services for testing
23+
ollamaService?: OllamaService;
24+
extractionService?: ExtractionService;
25+
}
26+
27+
function ExtractAppInner({
28+
file,
29+
provider,
30+
model,
31+
dryRun,
32+
onComplete,
33+
onError,
34+
}: Omit<ExtractAppProps, 'ollamaService' | 'extractionService'>) {
35+
const { isRawModeSupported } = useStdin();
36+
37+
const ollama = useOllama({
38+
provider,
39+
model,
40+
isInteractive: isRawModeSupported,
41+
});
42+
43+
const extraction = useExtraction({
44+
file,
45+
provider,
46+
model,
47+
dryRun,
48+
shouldStart: ollama.isReady,
49+
onComplete,
50+
onError,
51+
});
52+
53+
return (
54+
<Box flexDirection="column" padding={1}>
55+
<OllamaStatus
56+
state={ollama.state}
57+
isInteractive={isRawModeSupported}
58+
onInstallConfirm={
59+
ollama.state.status === 'prompt-install' ? ollama.handleInstallConfirm : undefined
60+
}
61+
onStartConfirm={
62+
ollama.state.status === 'prompt-start' ? ollama.handleStartConfirm : undefined
63+
}
64+
/>
65+
66+
{ollama.state.status === 'ready' && (
67+
<Box marginTop={1}>
68+
<ExtractionProgress state={extraction.state} dryRun={dryRun} />
69+
</Box>
70+
)}
71+
72+
{extraction.state.status === 'extracting' && extraction.responseContent && (
73+
<StreamingOutput content={extraction.responseContent} />
74+
)}
75+
76+
{extraction.result && extraction.state.status === 'complete' && (
77+
<Result data={extraction.result} showJson={true} />
78+
)}
79+
</Box>
80+
);
81+
}
82+
83+
export function ExtractApp({ ollamaService, extractionService, ...props }: ExtractAppProps) {
84+
return (
85+
<OllamaProvider service={ollamaService}>
86+
<ExtractionProvider service={extractionService}>
87+
<ExtractAppInner {...props} />
88+
</ExtractionProvider>
89+
</OllamaProvider>
90+
);
91+
}

0 commit comments

Comments
 (0)