Skip to content

Commit 84662e8

Browse files
SkylarKeltyclaude
andcommitted
feat: extract writers to shared module and add format converter CLI
Move DOCX/Markdown/JSON writing logic from cli.py into artemis/writers.py so it can be reused. Add convert.py for converting between formats without running research (Artemis JSON → DOCX/MD, Markdown → DOCX, etc). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 395d038 commit 84662e8

File tree

3 files changed

+350
-135
lines changed

3 files changed

+350
-135
lines changed

artemis/writers.py

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
"""Output format writers for Artemis research results.
2+
3+
Provides functions for writing research essays as JSON, Markdown, or DOCX.
4+
Used by both the research CLI (cli.py) and the standalone converter (convert.py).
5+
"""
6+
7+
import json
8+
import re
9+
import sys
10+
from typing import Any
11+
12+
13+
def format_sources_md(results: list) -> str:
14+
"""Build a numbered Markdown sources list from search results.
15+
16+
Args:
17+
results: List of objects with .title and .url attributes,
18+
or dicts with "title" and "url" keys.
19+
"""
20+
if not results:
21+
return ""
22+
lines = ["\n\n---\n\n## Sources\n"]
23+
for i, r in enumerate(results, 1):
24+
if isinstance(r, dict):
25+
title = r.get("title") or r.get("url", "")
26+
url = r.get("url", "")
27+
else:
28+
title = r.title or r.url
29+
url = r.url
30+
lines.append(f"{i}. [{title}]({url})")
31+
return "\n".join(lines)
32+
33+
34+
def write_json(
35+
path: str,
36+
query: str,
37+
essay: str,
38+
results: list,
39+
usage: dict[str, Any] | None = None,
40+
*,
41+
stdout: bool = False,
42+
) -> None:
43+
"""Write results as JSON."""
44+
output = {
45+
"query": query,
46+
"essay": essay,
47+
"results": [
48+
(
49+
{"title": r.get("title", ""), "url": r.get("url", ""), "snippet": r.get("snippet", "")}
50+
if isinstance(r, dict)
51+
else {"title": r.title, "url": r.url, "snippet": r.snippet}
52+
)
53+
for r in results
54+
],
55+
"usage": usage,
56+
}
57+
if stdout:
58+
json.dump(output, sys.stdout, indent=2)
59+
print()
60+
else:
61+
with open(path, "w") as f:
62+
json.dump(output, f, indent=2)
63+
64+
65+
def write_markdown(
66+
path: str,
67+
essay: str,
68+
results: list | None = None,
69+
*,
70+
stdout: bool = False,
71+
) -> None:
72+
"""Write essay as Markdown with optional sources appendix."""
73+
content = essay
74+
if results:
75+
content += format_sources_md(results)
76+
if stdout:
77+
print(content)
78+
else:
79+
with open(path, "w") as f:
80+
f.write(content)
81+
82+
83+
def _add_hyperlink(paragraph, url: str, text: str):
84+
"""Add a clickable hyperlink to a python-docx paragraph."""
85+
from docx.oxml.ns import qn
86+
from docx.oxml import OxmlElement
87+
88+
part = paragraph.part
89+
r_id = part.relate_to(
90+
url,
91+
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink",
92+
is_external=True,
93+
)
94+
95+
hyperlink = OxmlElement("w:hyperlink")
96+
hyperlink.set(qn("r:id"), r_id)
97+
98+
r = OxmlElement("w:r")
99+
rPr = OxmlElement("w:rPr")
100+
rStyle = OxmlElement("w:rStyle")
101+
rStyle.set(qn("w:val"), "Hyperlink")
102+
rPr.append(rStyle)
103+
r.append(rPr)
104+
105+
t = OxmlElement("w:t")
106+
t.text = text
107+
r.append(t)
108+
hyperlink.append(r)
109+
paragraph._p.append(hyperlink)
110+
return hyperlink
111+
112+
113+
def md_to_docx(
114+
path: str,
115+
essay: str,
116+
title: str | None = None,
117+
results: list | None = None,
118+
) -> None:
119+
"""Convert a Markdown essay into a formatted DOCX document.
120+
121+
Args:
122+
path: Output file path
123+
essay: Markdown essay text
124+
title: Optional document title (added as heading level 0)
125+
results: Optional list of source results for a sources appendix.
126+
Each item should have .title/.url attrs or "title"/"url" keys.
127+
"""
128+
from docx import Document
129+
from docx.enum.text import WD_ALIGN_PARAGRAPH
130+
131+
doc = Document()
132+
133+
if title:
134+
title_para = doc.add_heading(title, level=0)
135+
title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
136+
137+
for line in essay.split("\n"):
138+
stripped = line.strip()
139+
if not stripped:
140+
doc.add_paragraph("")
141+
continue
142+
143+
if stripped.startswith("#### "):
144+
doc.add_heading(stripped[5:], level=4)
145+
elif stripped.startswith("### "):
146+
doc.add_heading(stripped[4:], level=3)
147+
elif stripped.startswith("## "):
148+
doc.add_heading(stripped[3:], level=2)
149+
elif stripped.startswith("# "):
150+
doc.add_heading(stripped[2:], level=1)
151+
elif stripped.startswith("- ") or stripped.startswith("* "):
152+
doc.add_paragraph(stripped[2:], style="List Bullet")
153+
elif re.match(r"^\d+\.\s", stripped):
154+
text = re.sub(r"^\d+\.\s", "", stripped)
155+
doc.add_paragraph(text, style="List Number")
156+
else:
157+
doc.add_paragraph(stripped)
158+
159+
if results:
160+
doc.add_page_break()
161+
doc.add_heading("Sources", level=1)
162+
for i, r in enumerate(results, 1):
163+
if isinstance(r, dict):
164+
r_title = r.get("title") or r.get("url", "")
165+
r_url = r.get("url", "")
166+
else:
167+
r_title = r.title or r.url
168+
r_url = r.url
169+
para = doc.add_paragraph(f"{i}. ", style="List Number")
170+
_add_hyperlink(para, r_url, r_title)
171+
172+
doc.save(path)

cli.py

Lines changed: 19 additions & 135 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818

1919
import asyncio
2020
import argparse
21-
import json
2221
import re
2322
import sys
2423
from datetime import date
@@ -27,6 +26,8 @@
2726

2827
load_dotenv()
2928

29+
from artemis.writers import write_json, write_markdown, md_to_docx # noqa: E402
30+
3031
_FORMATS = ("json", "md", "docx")
3132
_PRESETS = ("deep", "shallow")
3233

@@ -48,126 +49,6 @@ def _default_output_path(query: str, fmt: str) -> str:
4849
return f"{slug}-{today}.{ext}"
4950

5051

51-
def _format_sources(results: list) -> str:
52-
"""Build a numbered sources list from search results."""
53-
if not results:
54-
return ""
55-
lines = ["\n\n---\n\n## Sources\n"]
56-
for i, r in enumerate(results, 1):
57-
title = r.title or r.url
58-
lines.append(f"{i}. [{title}]({r.url})")
59-
return "\n".join(lines)
60-
61-
62-
def _write_json(path: str, query: str, result, *, stdout: bool = False) -> None:
63-
"""Write results as JSON."""
64-
output = {
65-
"query": query,
66-
"essay": result.essay,
67-
"results": [
68-
{"title": r.title, "url": r.url, "snippet": r.snippet}
69-
for r in result.results
70-
],
71-
"usage": result.usage.model_dump() if result.usage else None,
72-
}
73-
if stdout:
74-
json.dump(output, sys.stdout, indent=2)
75-
print()
76-
else:
77-
with open(path, "w") as f:
78-
json.dump(output, f, indent=2)
79-
80-
81-
def _write_markdown(path: str, query: str, result, *, stdout: bool = False) -> None:
82-
"""Write essay as Markdown with a sources appendix."""
83-
content = result.essay + _format_sources(result.results)
84-
if stdout:
85-
print(content)
86-
else:
87-
with open(path, "w") as f:
88-
f.write(content)
89-
90-
91-
def _add_hyperlink(paragraph, url: str, text: str):
92-
"""Add a clickable hyperlink to a python-docx paragraph."""
93-
from docx.oxml.ns import qn
94-
from docx.oxml import OxmlElement
95-
96-
part = paragraph.part
97-
r_id = part.relate_to(url, "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink", is_external=True)
98-
99-
hyperlink = OxmlElement("w:hyperlink")
100-
hyperlink.set(qn("r:id"), r_id)
101-
102-
r = OxmlElement("w:r")
103-
rPr = OxmlElement("w:rPr")
104-
rStyle = OxmlElement("w:rStyle")
105-
rStyle.set(qn("w:val"), "Hyperlink")
106-
rPr.append(rStyle)
107-
r.append(rPr)
108-
109-
t = OxmlElement("w:t")
110-
t.text = text
111-
r.append(t)
112-
hyperlink.append(r)
113-
paragraph._p.append(hyperlink)
114-
return hyperlink
115-
116-
117-
def _write_docx(path: str, query: str, result, **_kwargs) -> None:
118-
"""Convert the markdown essay into a formatted DOCX document."""
119-
from docx import Document
120-
from docx.enum.text import WD_ALIGN_PARAGRAPH
121-
122-
doc = Document()
123-
124-
# Title
125-
title_para = doc.add_heading(query, level=0)
126-
title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
127-
128-
# Parse markdown line by line
129-
for line in result.essay.split("\n"):
130-
stripped = line.strip()
131-
if not stripped:
132-
doc.add_paragraph("")
133-
continue
134-
135-
# Headings
136-
if stripped.startswith("#### "):
137-
doc.add_heading(stripped[5:], level=4)
138-
elif stripped.startswith("### "):
139-
doc.add_heading(stripped[4:], level=3)
140-
elif stripped.startswith("## "):
141-
doc.add_heading(stripped[3:], level=2)
142-
elif stripped.startswith("# "):
143-
doc.add_heading(stripped[2:], level=1)
144-
elif stripped.startswith("- ") or stripped.startswith("* "):
145-
doc.add_paragraph(stripped[2:], style="List Bullet")
146-
elif re.match(r"^\d+\.\s", stripped):
147-
text = re.sub(r"^\d+\.\s", "", stripped)
148-
doc.add_paragraph(text, style="List Number")
149-
else:
150-
doc.add_paragraph(stripped)
151-
152-
# Sources appendix
153-
if result.results:
154-
doc.add_page_break()
155-
doc.add_heading("Sources", level=1)
156-
for i, r in enumerate(result.results, 1):
157-
title = r.title or r.url
158-
para = doc.add_paragraph(f"{i}. ", style="List Number")
159-
_add_hyperlink(para, r.url, title)
160-
161-
doc.save(path)
162-
163-
164-
_WRITERS = {
165-
"json": _write_json,
166-
"md": _write_markdown,
167-
"docx": _write_docx,
168-
}
169-
170-
17152
def _progress_callback(quiet: bool):
17253
"""Return a progress callback (or None if quiet)."""
17354
if quiet:
@@ -256,23 +137,26 @@ async def run_research(
256137
sys.exit(1)
257138

258139
# Write output
259-
writer = _WRITERS[fmt]
260-
if stdout:
261-
writer(output, query, result, stdout=True)
262-
else:
263-
writer(output, query, result)
264-
if not quiet:
265-
print(f"\n✅ Saved to {output}", file=sys.stderr)
140+
usage_dict = result.usage.model_dump() if result.usage else None
141+
if fmt == "json":
142+
write_json(output, query, result.essay, result.results, usage_dict, stdout=stdout)
143+
elif fmt == "md":
144+
write_markdown(output, result.essay, result.results, stdout=stdout)
145+
elif fmt == "docx":
146+
md_to_docx(output, result.essay, title=query, results=result.results)
147+
148+
if not stdout and not quiet:
149+
print(f"\n✅ Saved to {output}", file=sys.stderr)
150+
print(
151+
f" {len(result.essay)} chars | {len(result.results)} sources",
152+
file=sys.stderr,
153+
)
154+
if result.usage:
266155
print(
267-
f" {len(result.essay)} chars | {len(result.results)} sources",
156+
f" Tokens: {result.usage.total_tokens} "
157+
f"(in={result.usage.input_tokens} out={result.usage.output_tokens})",
268158
file=sys.stderr,
269159
)
270-
if result.usage:
271-
print(
272-
f" Tokens: {result.usage.total_tokens} "
273-
f"(in={result.usage.input_tokens} out={result.usage.output_tokens})",
274-
file=sys.stderr,
275-
)
276160

277161

278162
def main() -> None:

0 commit comments

Comments
 (0)