feat: extract writers to shared module and add format converter CLI

SkylarKelty · claude · SkylarKelty · commit 84662e889418 · 2026-03-11T14:06:48.000Z
Move DOCX/Markdown/JSON writing logic from cli.py into
artemis/writers.py so it can be reused. Add convert.py for converting
between formats without running research (Artemis JSON → DOCX/MD,
Markdown → DOCX, etc).

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/artemis/writers.py b/artemis/writers.py
@@ -0,0 +1,172 @@
+"""Output format writers for Artemis research results.
+
+Provides functions for writing research essays as JSON, Markdown, or DOCX.
+Used by both the research CLI (cli.py) and the standalone converter (convert.py).
+"""
+
+import json
+import re
+import sys
+from typing import Any
+
+
+def format_sources_md(results: list) -> str:
+    """Build a numbered Markdown sources list from search results.
+
+    Args:
+        results: List of objects with .title and .url attributes,
+                 or dicts with "title" and "url" keys.
+    """
+    if not results:
+        return ""
+    lines = ["\n\n---\n\n## Sources\n"]
+    for i, r in enumerate(results, 1):
+        if isinstance(r, dict):
+            title = r.get("title") or r.get("url", "")
+            url = r.get("url", "")
+        else:
+            title = r.title or r.url
+            url = r.url
+        lines.append(f"{i}. [{title}]({url})")
+    return "\n".join(lines)
+
+
+def write_json(
+    path: str,
+    query: str,
+    essay: str,
+    results: list,
+    usage: dict[str, Any] | None = None,
+    *,
+    stdout: bool = False,
+) -> None:
+    """Write results as JSON."""
+    output = {
+        "query": query,
+        "essay": essay,
+        "results": [
+            (
+                {"title": r.get("title", ""), "url": r.get("url", ""), "snippet": r.get("snippet", "")}
+                if isinstance(r, dict)
+                else {"title": r.title, "url": r.url, "snippet": r.snippet}
+            )
+            for r in results
+        ],
+        "usage": usage,
+    }
+    if stdout:
+        json.dump(output, sys.stdout, indent=2)
+        print()
+    else:
+        with open(path, "w") as f:
+            json.dump(output, f, indent=2)
+
+
+def write_markdown(
+    path: str,
+    essay: str,
+    results: list | None = None,
+    *,
+    stdout: bool = False,
+) -> None:
+    """Write essay as Markdown with optional sources appendix."""
+    content = essay
+    if results:
+        content += format_sources_md(results)
+    if stdout:
+        print(content)
+    else:
+        with open(path, "w") as f:
+            f.write(content)
+
+
+def _add_hyperlink(paragraph, url: str, text: str):
+    """Add a clickable hyperlink to a python-docx paragraph."""
+    from docx.oxml.ns import qn
+    from docx.oxml import OxmlElement
+
+    part = paragraph.part
+    r_id = part.relate_to(
+        url,
+        "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink",
+        is_external=True,
+    )
+
+    hyperlink = OxmlElement("w:hyperlink")
+    hyperlink.set(qn("r:id"), r_id)
+
+    r = OxmlElement("w:r")
+    rPr = OxmlElement("w:rPr")
+    rStyle = OxmlElement("w:rStyle")
+    rStyle.set(qn("w:val"), "Hyperlink")
+    rPr.append(rStyle)
+    r.append(rPr)
+
+    t = OxmlElement("w:t")
+    t.text = text
+    r.append(t)
+    hyperlink.append(r)
+    paragraph._p.append(hyperlink)
+    return hyperlink
+
+
+def md_to_docx(
+    path: str,
+    essay: str,
+    title: str | None = None,
+    results: list | None = None,
+) -> None:
+    """Convert a Markdown essay into a formatted DOCX document.
+
+    Args:
+        path: Output file path
+        essay: Markdown essay text
+        title: Optional document title (added as heading level 0)
+        results: Optional list of source results for a sources appendix.
+                 Each item should have .title/.url attrs or "title"/"url" keys.
+    """
+    from docx import Document
+    from docx.enum.text import WD_ALIGN_PARAGRAPH
+
+    doc = Document()
+
+    if title:
+        title_para = doc.add_heading(title, level=0)
+        title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
+
+    for line in essay.split("\n"):
+        stripped = line.strip()
+        if not stripped:
+            doc.add_paragraph("")
+            continue
+
+        if stripped.startswith("#### "):
+            doc.add_heading(stripped[5:], level=4)
+        elif stripped.startswith("### "):
+            doc.add_heading(stripped[4:], level=3)
+        elif stripped.startswith("## "):
+            doc.add_heading(stripped[3:], level=2)
+        elif stripped.startswith("# "):
+            doc.add_heading(stripped[2:], level=1)
+        elif stripped.startswith("- ") or stripped.startswith("* "):
+            doc.add_paragraph(stripped[2:], style="List Bullet")
+        elif re.match(r"^\d+\.\s", stripped):
+            text = re.sub(r"^\d+\.\s", "", stripped)
+            doc.add_paragraph(text, style="List Number")
+        else:
+            doc.add_paragraph(stripped)
+
+    if results:
+        doc.add_page_break()
+        doc.add_heading("Sources", level=1)
+        for i, r in enumerate(results, 1):
+            if isinstance(r, dict):
+                r_title = r.get("title") or r.get("url", "")
+                r_url = r.get("url", "")
+            else:
+                r_title = r.title or r.url
+                r_url = r.url
+            para = doc.add_paragraph(f"{i}. ", style="List Number")
+            _add_hyperlink(para, r_url, r_title)
+
+    doc.save(path)
diff --git a/cli.py b/cli.py
@@ -18,7 +18,6 @@
 
 import asyncio
 import argparse
-import json
 import re
 import sys
 from datetime import date
@@ -27,6 +26,8 @@
 
 load_dotenv()
 
+from artemis.writers import write_json, write_markdown, md_to_docx  # noqa: E402
+
 _FORMATS = ("json", "md", "docx")
 _PRESETS = ("deep", "shallow")
 
@@ -48,126 +49,6 @@ def _default_output_path(query: str, fmt: str) -> str:
     return f"{slug}-{today}.{ext}"
 
 
-def _format_sources(results: list) -> str:
-    """Build a numbered sources list from search results."""
-    if not results:
-        return ""
-    lines = ["\n\n---\n\n## Sources\n"]
-    for i, r in enumerate(results, 1):
-        title = r.title or r.url
-        lines.append(f"{i}. [{title}]({r.url})")
-    return "\n".join(lines)
-
-
-def _write_json(path: str, query: str, result, *, stdout: bool = False) -> None:
-    """Write results as JSON."""
-    output = {
-        "query": query,
-        "essay": result.essay,
-        "results": [
-            {"title": r.title, "url": r.url, "snippet": r.snippet}
-            for r in result.results
-        ],
-        "usage": result.usage.model_dump() if result.usage else None,
-    }
-    if stdout:
-        json.dump(output, sys.stdout, indent=2)
-        print()
-    else:
-        with open(path, "w") as f:
-            json.dump(output, f, indent=2)
-
-
-def _write_markdown(path: str, query: str, result, *, stdout: bool = False) -> None:
-    """Write essay as Markdown with a sources appendix."""
-    content = result.essay + _format_sources(result.results)
-    if stdout:
-        print(content)
-    else:
-        with open(path, "w") as f:
-            f.write(content)
-
-
-def _add_hyperlink(paragraph, url: str, text: str):
-    """Add a clickable hyperlink to a python-docx paragraph."""
-    from docx.oxml.ns import qn
-    from docx.oxml import OxmlElement
-
-    part = paragraph.part
-    r_id = part.relate_to(url, "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink", is_external=True)
-
-    hyperlink = OxmlElement("w:hyperlink")
-    hyperlink.set(qn("r:id"), r_id)
-
-    r = OxmlElement("w:r")
-    rPr = OxmlElement("w:rPr")
-    rStyle = OxmlElement("w:rStyle")
-    rStyle.set(qn("w:val"), "Hyperlink")
-    rPr.append(rStyle)
-    r.append(rPr)
-
-    t = OxmlElement("w:t")
-    t.text = text
-    r.append(t)
-    hyperlink.append(r)
-    paragraph._p.append(hyperlink)
-    return hyperlink
-
-
-def _write_docx(path: str, query: str, result, **_kwargs) -> None:
-    """Convert the markdown essay into a formatted DOCX document."""
-    from docx import Document
-    from docx.enum.text import WD_ALIGN_PARAGRAPH
-
-    doc = Document()
-
-    # Title
-    title_para = doc.add_heading(query, level=0)
-    title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
-
-    # Parse markdown line by line
-    for line in result.essay.split("\n"):
-        stripped = line.strip()
-        if not stripped:
-            doc.add_paragraph("")
-            continue
-
-        # Headings
-        if stripped.startswith("#### "):
-            doc.add_heading(stripped[5:], level=4)
-        elif stripped.startswith("### "):
-            doc.add_heading(stripped[4:], level=3)
-        elif stripped.startswith("## "):
-            doc.add_heading(stripped[3:], level=2)
-        elif stripped.startswith("# "):
-            doc.add_heading(stripped[2:], level=1)
-        elif stripped.startswith("- ") or stripped.startswith("* "):
-            doc.add_paragraph(stripped[2:], style="List Bullet")
-        elif re.match(r"^\d+\.\s", stripped):
-            text = re.sub(r"^\d+\.\s", "", stripped)
-            doc.add_paragraph(text, style="List Number")
-        else:
-            doc.add_paragraph(stripped)
-
-    # Sources appendix
-    if result.results:
-        doc.add_page_break()
-        doc.add_heading("Sources", level=1)
-        for i, r in enumerate(result.results, 1):
-            title = r.title or r.url
-            para = doc.add_paragraph(f"{i}. ", style="List Number")
-            _add_hyperlink(para, r.url, title)
-
-    doc.save(path)
-
-
-_WRITERS = {
-    "json": _write_json,
-    "md": _write_markdown,
-    "docx": _write_docx,
-}
-
-
 def _progress_callback(quiet: bool):
     """Return a progress callback (or None if quiet)."""
     if quiet:
@@ -256,23 +137,26 @@ async def run_research(
         sys.exit(1)
 
     # Write output
-    writer = _WRITERS[fmt]
-    if stdout:
-        writer(output, query, result, stdout=True)
-    else:
-        writer(output, query, result)
-        if not quiet:
-            print(f"\n✅ Saved to {output}", file=sys.stderr)
+    usage_dict = result.usage.model_dump() if result.usage else None
+    if fmt == "json":
+        write_json(output, query, result.essay, result.results, usage_dict, stdout=stdout)
+    elif fmt == "md":
+        write_markdown(output, result.essay, result.results, stdout=stdout)
+    elif fmt == "docx":
+        md_to_docx(output, result.essay, title=query, results=result.results)
+
+    if not stdout and not quiet:
+        print(f"\n✅ Saved to {output}", file=sys.stderr)
+        print(
+            f"   {len(result.essay)} chars | {len(result.results)} sources",
+            file=sys.stderr,
+        )
+        if result.usage:
             print(
-                f"   {len(result.essay)} chars | {len(result.results)} sources",
+                f"   Tokens: {result.usage.total_tokens} "
+                f"(in={result.usage.input_tokens} out={result.usage.output_tokens})",
                 file=sys.stderr,
             )
-            if result.usage:
-                print(
-                    f"   Tokens: {result.usage.total_tokens} "
-                    f"(in={result.usage.input_tokens} out={result.usage.output_tokens})",
-                    file=sys.stderr,
-                )
 
 
 def main() -> None:
diff --git a/convert.py b/convert.py