Skip to content

Commit 321f380

Browse files
SkylarKeltyclaude
andcommitted
Overhaul CLI with format export, presets, and bug fixes (#3)
Rewrite cli.py to fix bugs and add requested features: Bug fixes: - Remove duplicate show_progress() definition (second shadowed first) - Print full essay instead of truncating at 5000 chars with no warning - Output path no longer hardcoded to 'research_output.json' New features: - --format / -f: json (default), md, docx output formats - --output / -o: custom output path, '-' for stdout, auto-generated slug+date filename when omitted (e.g. quantum-computing-2026-03-11.docx) - --preset: deep (default) or shallow, maps to existing config presets and passes all relevant settings (sub_queries, content_extraction, etc.) - --quiet / -q: suppress progress output - Progress output goes to stderr so stdout piping works cleanly DOCX export uses python-docx (added to requirements.txt) — renders headings, bullet lists, numbered lists, and a sources appendix. Co-authored-by: Claude <noreply@anthropic.com>
1 parent dff05d2 commit 321f380

File tree

2 files changed

+290
-60
lines changed

2 files changed

+290
-60
lines changed

cli.py

Lines changed: 289 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,85 +1,314 @@
11
#!/usr/bin/env python3
2-
"""One-shot CLI for deep research without HTTP server."""
2+
"""One-shot CLI for deep research without the HTTP server.
3+
4+
Usage examples::
5+
6+
# Basic research (outputs JSON by default)
7+
python cli.py "quantum computing advances"
8+
9+
# DOCX report with auto-generated filename
10+
python cli.py "climate change mitigation" --format docx
11+
12+
# Markdown to specific file, shallow preset
13+
python cli.py "rust vs go" --format md --output comparison.md --preset shallow
14+
15+
# Write to stdout
16+
python cli.py "LLM architectures" --format md --output -
17+
"""
318

419
import asyncio
520
import argparse
6-
import os
21+
import json
22+
import re
723
import sys
24+
from datetime import date
25+
826
from dotenv import load_dotenv
927

1028
load_dotenv()
1129

30+
_FORMATS = ("json", "md", "docx")
31+
_PRESETS = ("deep", "shallow")
32+
33+
34+
def _slugify(text: str, max_len: int = 48) -> str:
35+
"""Convert a query string into a filename-safe slug."""
36+
slug = text.lower().strip()
37+
slug = re.sub(r"[^\w\s-]", "", slug)
38+
slug = re.sub(r"[\s_]+", "-", slug)
39+
slug = re.sub(r"-+", "-", slug).strip("-")
40+
return slug[:max_len]
41+
42+
43+
def _default_output_path(query: str, fmt: str) -> str:
44+
"""Generate an output filename from query and date."""
45+
ext = {"json": "json", "md": "md", "docx": "docx"}[fmt]
46+
slug = _slugify(query)
47+
today = date.today().isoformat()
48+
return f"{slug}-{today}.{ext}"
49+
50+
51+
def _format_sources(results: list) -> str:
52+
"""Build a numbered sources list from search results."""
53+
if not results:
54+
return ""
55+
lines = ["\n\n---\n\n## Sources\n"]
56+
for i, r in enumerate(results, 1):
57+
title = r.title or r.url
58+
lines.append(f"{i}. [{title}]({r.url})")
59+
return "\n".join(lines)
60+
61+
62+
def _write_json(path: str, query: str, result, *, stdout: bool = False) -> None:
63+
"""Write results as JSON."""
64+
output = {
65+
"query": query,
66+
"essay": result.essay,
67+
"results": [
68+
{"title": r.title, "url": r.url, "snippet": r.snippet}
69+
for r in result.results
70+
],
71+
"usage": result.usage.model_dump() if result.usage else None,
72+
}
73+
if stdout:
74+
json.dump(output, sys.stdout, indent=2)
75+
print()
76+
else:
77+
with open(path, "w") as f:
78+
json.dump(output, f, indent=2)
79+
80+
81+
def _write_markdown(path: str, query: str, result, *, stdout: bool = False) -> None:
82+
"""Write essay as Markdown with a sources appendix."""
83+
content = result.essay + _format_sources(result.results)
84+
if stdout:
85+
print(content)
86+
else:
87+
with open(path, "w") as f:
88+
f.write(content)
89+
90+
91+
def _write_docx(path: str, query: str, result, **_kwargs) -> None:
92+
"""Convert the markdown essay into a formatted DOCX document."""
93+
from docx import Document
94+
from docx.enum.text import WD_ALIGN_PARAGRAPH
95+
96+
doc = Document()
97+
98+
# Title
99+
title_para = doc.add_heading(query, level=0)
100+
title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
101+
102+
# Parse markdown line by line
103+
for line in result.essay.split("\n"):
104+
stripped = line.strip()
105+
if not stripped:
106+
doc.add_paragraph("")
107+
continue
108+
109+
# Headings
110+
if stripped.startswith("#### "):
111+
doc.add_heading(stripped[5:], level=4)
112+
elif stripped.startswith("### "):
113+
doc.add_heading(stripped[4:], level=3)
114+
elif stripped.startswith("## "):
115+
doc.add_heading(stripped[3:], level=2)
116+
elif stripped.startswith("# "):
117+
doc.add_heading(stripped[2:], level=1)
118+
elif stripped.startswith("- ") or stripped.startswith("* "):
119+
doc.add_paragraph(stripped[2:], style="List Bullet")
120+
elif re.match(r"^\d+\.\s", stripped):
121+
text = re.sub(r"^\d+\.\s", "", stripped)
122+
doc.add_paragraph(text, style="List Number")
123+
else:
124+
doc.add_paragraph(stripped)
12125

13-
async def run_research(query: str, stages: int = 3, passes: int = 2):
126+
# Sources appendix
127+
if result.results:
128+
doc.add_page_break()
129+
doc.add_heading("Sources", level=1)
130+
for i, r in enumerate(result.results, 1):
131+
title = r.title or r.url
132+
doc.add_paragraph(f"{i}. {title}\n {r.url}", style="List Number")
133+
134+
doc.save(path)
135+
136+
137+
_WRITERS = {
138+
"json": _write_json,
139+
"md": _write_markdown,
140+
"docx": _write_docx,
141+
}
142+
143+
144+
def _progress_callback(quiet: bool):
145+
"""Return a progress callback (or None if quiet)."""
146+
if quiet:
147+
return None
148+
icons = {
149+
"start": "🎯",
150+
"outline": "📋",
151+
"pass": "🔄",
152+
"search": "🔍",
153+
"synthesis": "✍️",
154+
"complete": "✅",
155+
}
156+
157+
def show_progress(stage: str, message: str) -> None:
158+
icon = icons.get(stage, "•")
159+
print(f"{icon} {message}", file=sys.stderr)
160+
161+
return show_progress
162+
163+
164+
async def run_research(
165+
query: str,
166+
fmt: str,
167+
output: str | None,
168+
preset: str,
169+
stages: int | None,
170+
passes: int | None,
171+
quiet: bool,
172+
) -> None:
14173
from artemis.config import get_settings
15174
from artemis.researcher import deep_research
16-
from artemis.models import DeepResearchRun
17-
import json
18-
175+
19176
settings = get_settings()
20-
21-
# Progress callback for real-time updates
22-
def show_progress(stage: str, message: str):
23-
icons = {
24-
"start": "🎯",
25-
"outline": "📋",
26-
"pass": "🔄",
27-
"search": "🔍",
28-
"synthesis": "✍️",
29-
"complete": "✅",
30-
}
31-
icon = icons.get(stage, "•")
32-
print(f"{icon} {message}")
33-
34-
# Progress callback
35-
def show_progress(stage: str, message: str):
36-
icons = {"start": "🎯", "outline": "📋", "pass": "🔄", "search": "🔍", "synthesis": "✍️", "complete": "✅"}
37-
print(f"{icons.get(stage, '•')} {message}")
38-
39-
print(f"Running deep research on: {query}")
40-
print(f"Stages: {stages}, Passes: {passes}")
41-
print("-" * 50)
42-
177+
178+
# Resolve preset defaults
179+
if preset == "shallow":
180+
stages = stages or settings.shallow_research_stages
181+
passes = passes or settings.shallow_research_passes
182+
sub_queries = settings.shallow_research_subqueries
183+
results_per_query = settings.shallow_research_results_per_query
184+
max_tokens = settings.shallow_research_max_tokens
185+
content_extraction = settings.shallow_research_content_extraction
186+
pages_per_section = settings.shallow_research_pages_per_section
187+
content_max_chars = settings.shallow_research_content_max_chars
188+
else:
189+
stages = stages or settings.deep_research_stages
190+
passes = passes or settings.deep_research_passes
191+
sub_queries = settings.deep_research_subqueries
192+
results_per_query = settings.deep_research_results_per_query
193+
max_tokens = settings.deep_research_max_tokens
194+
content_extraction = settings.deep_research_content_extraction
195+
pages_per_section = settings.deep_research_pages_per_section
196+
content_max_chars = settings.deep_research_content_max_chars
197+
198+
stdout = output == "-"
199+
200+
# Resolve output path
201+
if output is None:
202+
output = _default_output_path(query, fmt)
203+
elif stdout and fmt == "docx":
204+
print("Error: DOCX format cannot be written to stdout.", file=sys.stderr)
205+
sys.exit(1)
206+
207+
progress = _progress_callback(quiet or stdout)
208+
209+
if not quiet and not stdout:
210+
print(f"Research: {query}", file=sys.stderr)
211+
print(f"Preset: {preset} | Stages: {stages} | Passes: {passes}", file=sys.stderr)
212+
print("-" * 50, file=sys.stderr)
213+
43214
try:
44-
result: DeepResearchRun = await deep_research(
215+
result = await deep_research(
45216
query=query,
46217
stages=stages,
47218
passes=passes,
48-
progress_callback=show_progress,
219+
sub_queries_per_stage=sub_queries,
220+
results_per_query=results_per_query,
221+
max_tokens=max_tokens,
222+
content_extraction=content_extraction,
223+
pages_per_section=pages_per_section,
224+
content_max_chars=content_max_chars,
225+
progress_callback=progress,
49226
)
50-
51-
print(f"\n=== RESULTS ===")
52-
print(f"Essay length: {len(result.essay)} chars")
53-
print(f"Sources: {len(result.results)}")
54-
print(f"Token usage: {result.usage}")
55-
print(f"\n=== ESSAY ===\n")
56-
print(result.essay[:5000])
57-
58-
output = {
59-
"query": query,
60-
"essay": result.essay,
61-
"results": [{"title": r.title, "url": r.url, "snippet": r.snippet} for r in result.results],
62-
"usage": result.usage.model_dump() if result.usage else None,
63-
}
64-
65-
with open("research_output.json", "w") as f:
66-
json.dump(output, f, indent=2)
67-
print(f"\n\nFull output saved to: research_output.json")
68-
69227
except Exception as e:
70-
print(f"Error: {e}")
71-
import traceback
72-
traceback.print_exc()
228+
print(f"Error: {e}", file=sys.stderr)
229+
sys.exit(1)
230+
231+
# Write output
232+
writer = _WRITERS[fmt]
233+
if stdout:
234+
writer(output, query, result, stdout=True)
235+
else:
236+
writer(output, query, result)
237+
if not quiet:
238+
print(f"\n✅ Saved to {output}", file=sys.stderr)
239+
print(
240+
f" {len(result.essay)} chars | {len(result.results)} sources",
241+
file=sys.stderr,
242+
)
243+
if result.usage:
244+
print(
245+
f" Tokens: {result.usage.total_tokens} "
246+
f"(in={result.usage.input_tokens} out={result.usage.output_tokens})",
247+
file=sys.stderr,
248+
)
73249

74250

75-
def main():
76-
parser = argparse.ArgumentParser(description="One-shot deep research")
77-
parser.add_argument("query", help="Research query")
78-
parser.add_argument("--stages", type=int, default=3, help="Number of outline sections")
79-
parser.add_argument("--passes", type=int, default=2, help="Number of research passes")
251+
def main() -> None:
252+
parser = argparse.ArgumentParser(
253+
description="Artemis — one-shot deep research from the command line",
254+
formatter_class=argparse.RawDescriptionHelpFormatter,
255+
epilog=(
256+
"examples:\n"
257+
' %(prog)s "quantum computing"\n'
258+
' %(prog)s "climate change" --format docx\n'
259+
' %(prog)s "rust vs go" --format md -o comparison.md --preset shallow\n'
260+
' %(prog)s "LLM architectures" --format md -o - # stdout\n'
261+
),
262+
)
263+
parser.add_argument("query", help="Research query or question")
264+
parser.add_argument(
265+
"--format", "-f",
266+
choices=_FORMATS,
267+
default="json",
268+
help="Output format (default: json)",
269+
)
270+
parser.add_argument(
271+
"--output", "-o",
272+
metavar="PATH",
273+
default=None,
274+
help='Output file path (default: auto-generated). Use "-" for stdout.',
275+
)
276+
parser.add_argument(
277+
"--preset",
278+
choices=_PRESETS,
279+
default="deep",
280+
help="Research preset (default: deep)",
281+
)
282+
parser.add_argument(
283+
"--stages",
284+
type=int,
285+
default=None,
286+
help="Number of outline sections (overrides preset)",
287+
)
288+
parser.add_argument(
289+
"--passes",
290+
type=int,
291+
default=None,
292+
help="Number of research passes (overrides preset)",
293+
)
294+
parser.add_argument(
295+
"--quiet", "-q",
296+
action="store_true",
297+
help="Suppress progress output",
298+
)
80299
args = parser.parse_args()
81-
82-
asyncio.run(run_research(args.query, args.stages, args.passes))
300+
301+
asyncio.run(
302+
run_research(
303+
query=args.query,
304+
fmt=args.format,
305+
output=args.output,
306+
preset=args.preset,
307+
stages=args.stages,
308+
passes=args.passes,
309+
quiet=args.quiet,
310+
)
311+
)
83312

84313

85314
if __name__ == "__main__":

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@ pydantic>=2.5.0,<3.0.0
55
python-dotenv>=1.0.0,<2.0.0
66
trafilatura>=2.0.0,<3.0.0
77
playwright>=1.40.0,<2.0.0
8+
python-docx>=1.1.0,<2.0.0

0 commit comments

Comments
 (0)