|
1 | 1 | #!/usr/bin/env python3 |
2 | | -"""One-shot CLI for deep research without HTTP server.""" |
| 2 | +"""One-shot CLI for deep research without the HTTP server. |
| 3 | +
|
| 4 | +Usage examples:: |
| 5 | +
|
| 6 | + # Basic research (outputs JSON by default) |
| 7 | + python cli.py "quantum computing advances" |
| 8 | +
|
| 9 | + # DOCX report with auto-generated filename |
| 10 | + python cli.py "climate change mitigation" --format docx |
| 11 | +
|
| 12 | + # Markdown to specific file, shallow preset |
| 13 | + python cli.py "rust vs go" --format md --output comparison.md --preset shallow |
| 14 | +
|
| 15 | + # Write to stdout |
| 16 | + python cli.py "LLM architectures" --format md --output - |
| 17 | +""" |
3 | 18 |
|
4 | 19 | import asyncio |
5 | 20 | import argparse |
6 | | -import os |
| 21 | +import json |
| 22 | +import re |
7 | 23 | import sys |
| 24 | +from datetime import date |
| 25 | + |
8 | 26 | from dotenv import load_dotenv |
9 | 27 |
|
10 | 28 | load_dotenv() |
11 | 29 |
|
| 30 | +_FORMATS = ("json", "md", "docx") |
| 31 | +_PRESETS = ("deep", "shallow") |
| 32 | + |
| 33 | + |
| 34 | +def _slugify(text: str, max_len: int = 48) -> str: |
| 35 | + """Convert a query string into a filename-safe slug.""" |
| 36 | + slug = text.lower().strip() |
| 37 | + slug = re.sub(r"[^\w\s-]", "", slug) |
| 38 | + slug = re.sub(r"[\s_]+", "-", slug) |
| 39 | + slug = re.sub(r"-+", "-", slug).strip("-") |
| 40 | + return slug[:max_len] |
| 41 | + |
| 42 | + |
| 43 | +def _default_output_path(query: str, fmt: str) -> str: |
| 44 | + """Generate an output filename from query and date.""" |
| 45 | + ext = {"json": "json", "md": "md", "docx": "docx"}[fmt] |
| 46 | + slug = _slugify(query) |
| 47 | + today = date.today().isoformat() |
| 48 | + return f"{slug}-{today}.{ext}" |
| 49 | + |
| 50 | + |
| 51 | +def _format_sources(results: list) -> str: |
| 52 | + """Build a numbered sources list from search results.""" |
| 53 | + if not results: |
| 54 | + return "" |
| 55 | + lines = ["\n\n---\n\n## Sources\n"] |
| 56 | + for i, r in enumerate(results, 1): |
| 57 | + title = r.title or r.url |
| 58 | + lines.append(f"{i}. [{title}]({r.url})") |
| 59 | + return "\n".join(lines) |
| 60 | + |
| 61 | + |
| 62 | +def _write_json(path: str, query: str, result, *, stdout: bool = False) -> None: |
| 63 | + """Write results as JSON.""" |
| 64 | + output = { |
| 65 | + "query": query, |
| 66 | + "essay": result.essay, |
| 67 | + "results": [ |
| 68 | + {"title": r.title, "url": r.url, "snippet": r.snippet} |
| 69 | + for r in result.results |
| 70 | + ], |
| 71 | + "usage": result.usage.model_dump() if result.usage else None, |
| 72 | + } |
| 73 | + if stdout: |
| 74 | + json.dump(output, sys.stdout, indent=2) |
| 75 | + print() |
| 76 | + else: |
| 77 | + with open(path, "w") as f: |
| 78 | + json.dump(output, f, indent=2) |
| 79 | + |
| 80 | + |
| 81 | +def _write_markdown(path: str, query: str, result, *, stdout: bool = False) -> None: |
| 82 | + """Write essay as Markdown with a sources appendix.""" |
| 83 | + content = result.essay + _format_sources(result.results) |
| 84 | + if stdout: |
| 85 | + print(content) |
| 86 | + else: |
| 87 | + with open(path, "w") as f: |
| 88 | + f.write(content) |
| 89 | + |
| 90 | + |
| 91 | +def _write_docx(path: str, query: str, result, **_kwargs) -> None: |
| 92 | + """Convert the markdown essay into a formatted DOCX document.""" |
| 93 | + from docx import Document |
| 94 | + from docx.enum.text import WD_ALIGN_PARAGRAPH |
| 95 | + |
| 96 | + doc = Document() |
| 97 | + |
| 98 | + # Title |
| 99 | + title_para = doc.add_heading(query, level=0) |
| 100 | + title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER |
| 101 | + |
| 102 | + # Parse markdown line by line |
| 103 | + for line in result.essay.split("\n"): |
| 104 | + stripped = line.strip() |
| 105 | + if not stripped: |
| 106 | + doc.add_paragraph("") |
| 107 | + continue |
| 108 | + |
| 109 | + # Headings |
| 110 | + if stripped.startswith("#### "): |
| 111 | + doc.add_heading(stripped[5:], level=4) |
| 112 | + elif stripped.startswith("### "): |
| 113 | + doc.add_heading(stripped[4:], level=3) |
| 114 | + elif stripped.startswith("## "): |
| 115 | + doc.add_heading(stripped[3:], level=2) |
| 116 | + elif stripped.startswith("# "): |
| 117 | + doc.add_heading(stripped[2:], level=1) |
| 118 | + elif stripped.startswith("- ") or stripped.startswith("* "): |
| 119 | + doc.add_paragraph(stripped[2:], style="List Bullet") |
| 120 | + elif re.match(r"^\d+\.\s", stripped): |
| 121 | + text = re.sub(r"^\d+\.\s", "", stripped) |
| 122 | + doc.add_paragraph(text, style="List Number") |
| 123 | + else: |
| 124 | + doc.add_paragraph(stripped) |
12 | 125 |
|
13 | | -async def run_research(query: str, stages: int = 3, passes: int = 2): |
| 126 | + # Sources appendix |
| 127 | + if result.results: |
| 128 | + doc.add_page_break() |
| 129 | + doc.add_heading("Sources", level=1) |
| 130 | + for i, r in enumerate(result.results, 1): |
| 131 | + title = r.title or r.url |
| 132 | + doc.add_paragraph(f"{i}. {title}\n {r.url}", style="List Number") |
| 133 | + |
| 134 | + doc.save(path) |
| 135 | + |
| 136 | + |
| 137 | +_WRITERS = { |
| 138 | + "json": _write_json, |
| 139 | + "md": _write_markdown, |
| 140 | + "docx": _write_docx, |
| 141 | +} |
| 142 | + |
| 143 | + |
| 144 | +def _progress_callback(quiet: bool): |
| 145 | + """Return a progress callback (or None if quiet).""" |
| 146 | + if quiet: |
| 147 | + return None |
| 148 | + icons = { |
| 149 | + "start": "🎯", |
| 150 | + "outline": "📋", |
| 151 | + "pass": "🔄", |
| 152 | + "search": "🔍", |
| 153 | + "synthesis": "✍️", |
| 154 | + "complete": "✅", |
| 155 | + } |
| 156 | + |
| 157 | + def show_progress(stage: str, message: str) -> None: |
| 158 | + icon = icons.get(stage, "•") |
| 159 | + print(f"{icon} {message}", file=sys.stderr) |
| 160 | + |
| 161 | + return show_progress |
| 162 | + |
| 163 | + |
| 164 | +async def run_research( |
| 165 | + query: str, |
| 166 | + fmt: str, |
| 167 | + output: str | None, |
| 168 | + preset: str, |
| 169 | + stages: int | None, |
| 170 | + passes: int | None, |
| 171 | + quiet: bool, |
| 172 | +) -> None: |
14 | 173 | from artemis.config import get_settings |
15 | 174 | from artemis.researcher import deep_research |
16 | | - from artemis.models import DeepResearchRun |
17 | | - import json |
18 | | - |
| 175 | + |
19 | 176 | settings = get_settings() |
20 | | - |
21 | | - # Progress callback for real-time updates |
22 | | - def show_progress(stage: str, message: str): |
23 | | - icons = { |
24 | | - "start": "🎯", |
25 | | - "outline": "📋", |
26 | | - "pass": "🔄", |
27 | | - "search": "🔍", |
28 | | - "synthesis": "✍️", |
29 | | - "complete": "✅", |
30 | | - } |
31 | | - icon = icons.get(stage, "•") |
32 | | - print(f"{icon} {message}") |
33 | | - |
34 | | - # Progress callback |
35 | | - def show_progress(stage: str, message: str): |
36 | | - icons = {"start": "🎯", "outline": "📋", "pass": "🔄", "search": "🔍", "synthesis": "✍️", "complete": "✅"} |
37 | | - print(f"{icons.get(stage, '•')} {message}") |
38 | | - |
39 | | - print(f"Running deep research on: {query}") |
40 | | - print(f"Stages: {stages}, Passes: {passes}") |
41 | | - print("-" * 50) |
42 | | - |
| 177 | + |
| 178 | + # Resolve preset defaults |
| 179 | + if preset == "shallow": |
| 180 | + stages = stages or settings.shallow_research_stages |
| 181 | + passes = passes or settings.shallow_research_passes |
| 182 | + sub_queries = settings.shallow_research_subqueries |
| 183 | + results_per_query = settings.shallow_research_results_per_query |
| 184 | + max_tokens = settings.shallow_research_max_tokens |
| 185 | + content_extraction = settings.shallow_research_content_extraction |
| 186 | + pages_per_section = settings.shallow_research_pages_per_section |
| 187 | + content_max_chars = settings.shallow_research_content_max_chars |
| 188 | + else: |
| 189 | + stages = stages or settings.deep_research_stages |
| 190 | + passes = passes or settings.deep_research_passes |
| 191 | + sub_queries = settings.deep_research_subqueries |
| 192 | + results_per_query = settings.deep_research_results_per_query |
| 193 | + max_tokens = settings.deep_research_max_tokens |
| 194 | + content_extraction = settings.deep_research_content_extraction |
| 195 | + pages_per_section = settings.deep_research_pages_per_section |
| 196 | + content_max_chars = settings.deep_research_content_max_chars |
| 197 | + |
| 198 | + stdout = output == "-" |
| 199 | + |
| 200 | + # Resolve output path |
| 201 | + if output is None: |
| 202 | + output = _default_output_path(query, fmt) |
| 203 | + elif stdout and fmt == "docx": |
| 204 | + print("Error: DOCX format cannot be written to stdout.", file=sys.stderr) |
| 205 | + sys.exit(1) |
| 206 | + |
| 207 | + progress = _progress_callback(quiet or stdout) |
| 208 | + |
| 209 | + if not quiet and not stdout: |
| 210 | + print(f"Research: {query}", file=sys.stderr) |
| 211 | + print(f"Preset: {preset} | Stages: {stages} | Passes: {passes}", file=sys.stderr) |
| 212 | + print("-" * 50, file=sys.stderr) |
| 213 | + |
43 | 214 | try: |
44 | | - result: DeepResearchRun = await deep_research( |
| 215 | + result = await deep_research( |
45 | 216 | query=query, |
46 | 217 | stages=stages, |
47 | 218 | passes=passes, |
48 | | - progress_callback=show_progress, |
| 219 | + sub_queries_per_stage=sub_queries, |
| 220 | + results_per_query=results_per_query, |
| 221 | + max_tokens=max_tokens, |
| 222 | + content_extraction=content_extraction, |
| 223 | + pages_per_section=pages_per_section, |
| 224 | + content_max_chars=content_max_chars, |
| 225 | + progress_callback=progress, |
49 | 226 | ) |
50 | | - |
51 | | - print(f"\n=== RESULTS ===") |
52 | | - print(f"Essay length: {len(result.essay)} chars") |
53 | | - print(f"Sources: {len(result.results)}") |
54 | | - print(f"Token usage: {result.usage}") |
55 | | - print(f"\n=== ESSAY ===\n") |
56 | | - print(result.essay[:5000]) |
57 | | - |
58 | | - output = { |
59 | | - "query": query, |
60 | | - "essay": result.essay, |
61 | | - "results": [{"title": r.title, "url": r.url, "snippet": r.snippet} for r in result.results], |
62 | | - "usage": result.usage.model_dump() if result.usage else None, |
63 | | - } |
64 | | - |
65 | | - with open("research_output.json", "w") as f: |
66 | | - json.dump(output, f, indent=2) |
67 | | - print(f"\n\nFull output saved to: research_output.json") |
68 | | - |
69 | 227 | except Exception as e: |
70 | | - print(f"Error: {e}") |
71 | | - import traceback |
72 | | - traceback.print_exc() |
| 228 | + print(f"Error: {e}", file=sys.stderr) |
| 229 | + sys.exit(1) |
| 230 | + |
| 231 | + # Write output |
| 232 | + writer = _WRITERS[fmt] |
| 233 | + if stdout: |
| 234 | + writer(output, query, result, stdout=True) |
| 235 | + else: |
| 236 | + writer(output, query, result) |
| 237 | + if not quiet: |
| 238 | + print(f"\n✅ Saved to {output}", file=sys.stderr) |
| 239 | + print( |
| 240 | + f" {len(result.essay)} chars | {len(result.results)} sources", |
| 241 | + file=sys.stderr, |
| 242 | + ) |
| 243 | + if result.usage: |
| 244 | + print( |
| 245 | + f" Tokens: {result.usage.total_tokens} " |
| 246 | + f"(in={result.usage.input_tokens} out={result.usage.output_tokens})", |
| 247 | + file=sys.stderr, |
| 248 | + ) |
73 | 249 |
|
74 | 250 |
|
75 | | -def main(): |
76 | | - parser = argparse.ArgumentParser(description="One-shot deep research") |
77 | | - parser.add_argument("query", help="Research query") |
78 | | - parser.add_argument("--stages", type=int, default=3, help="Number of outline sections") |
79 | | - parser.add_argument("--passes", type=int, default=2, help="Number of research passes") |
| 251 | +def main() -> None: |
| 252 | + parser = argparse.ArgumentParser( |
| 253 | + description="Artemis — one-shot deep research from the command line", |
| 254 | + formatter_class=argparse.RawDescriptionHelpFormatter, |
| 255 | + epilog=( |
| 256 | + "examples:\n" |
| 257 | + ' %(prog)s "quantum computing"\n' |
| 258 | + ' %(prog)s "climate change" --format docx\n' |
| 259 | + ' %(prog)s "rust vs go" --format md -o comparison.md --preset shallow\n' |
| 260 | + ' %(prog)s "LLM architectures" --format md -o - # stdout\n' |
| 261 | + ), |
| 262 | + ) |
| 263 | + parser.add_argument("query", help="Research query or question") |
| 264 | + parser.add_argument( |
| 265 | + "--format", "-f", |
| 266 | + choices=_FORMATS, |
| 267 | + default="json", |
| 268 | + help="Output format (default: json)", |
| 269 | + ) |
| 270 | + parser.add_argument( |
| 271 | + "--output", "-o", |
| 272 | + metavar="PATH", |
| 273 | + default=None, |
| 274 | + help='Output file path (default: auto-generated). Use "-" for stdout.', |
| 275 | + ) |
| 276 | + parser.add_argument( |
| 277 | + "--preset", |
| 278 | + choices=_PRESETS, |
| 279 | + default="deep", |
| 280 | + help="Research preset (default: deep)", |
| 281 | + ) |
| 282 | + parser.add_argument( |
| 283 | + "--stages", |
| 284 | + type=int, |
| 285 | + default=None, |
| 286 | + help="Number of outline sections (overrides preset)", |
| 287 | + ) |
| 288 | + parser.add_argument( |
| 289 | + "--passes", |
| 290 | + type=int, |
| 291 | + default=None, |
| 292 | + help="Number of research passes (overrides preset)", |
| 293 | + ) |
| 294 | + parser.add_argument( |
| 295 | + "--quiet", "-q", |
| 296 | + action="store_true", |
| 297 | + help="Suppress progress output", |
| 298 | + ) |
80 | 299 | args = parser.parse_args() |
81 | | - |
82 | | - asyncio.run(run_research(args.query, args.stages, args.passes)) |
| 300 | + |
| 301 | + asyncio.run( |
| 302 | + run_research( |
| 303 | + query=args.query, |
| 304 | + fmt=args.format, |
| 305 | + output=args.output, |
| 306 | + preset=args.preset, |
| 307 | + stages=args.stages, |
| 308 | + passes=args.passes, |
| 309 | + quiet=args.quiet, |
| 310 | + ) |
| 311 | + ) |
83 | 312 |
|
84 | 313 |
|
85 | 314 | if __name__ == "__main__": |
|
0 commit comments