fix: handle orphaned think tags and fix DOCX sources styling

SkylarKelty · claude · SkylarKelty · commit fab21728bac2 · 2026-03-11T14:17:20.000Z
- Strip orphaned &lt;/think&gt; (no matching open) and unclosed &lt;think&gt; tags
  in llm.py, not just matched pairs
- Apply same stripping in convert.py for already-saved responses
- Fix DOCX sources: remove double-numbering from List Number style,
  style hyperlinks with blue underline directly instead of relying on
  a missing Hyperlink character style

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/artemis/llm.py b/artemis/llm.py
@@ -374,8 +374,13 @@ async def chat_completion(
     if not isinstance(content, str) or not content.strip():
         raise UpstreamServiceError("The LLM backend returned empty content.")
 
-    # Strip <think>...</think> reasoning blocks some models emit
-    content = re.sub(r"<think>.*?</think>", "", content, flags=re.DOTALL).strip()
+    # Strip <think>...</think> reasoning blocks some models emit.
+    # Also handle orphaned tags: leading content before a lone </think>
+    # (opening tag was outside this response) or trailing <think> without close.
+    content = re.sub(r"<think>.*?</think>", "", content, flags=re.DOTALL)
+    content = re.sub(r"^.*?</think>", "", content, flags=re.DOTALL)
+    content = re.sub(r"<think>.*$", "", content, flags=re.DOTALL)
+    content = content.strip()
     if not content:
         raise UpstreamServiceError("The LLM backend returned empty content.")
 
diff --git a/artemis/writers.py b/artemis/writers.py
@@ -81,9 +81,10 @@ def write_markdown(
 
 
 def _add_hyperlink(paragraph, url: str, text: str):
-    """Add a clickable hyperlink to a python-docx paragraph."""
+    """Add a clickable, blue-underlined hyperlink to a python-docx paragraph."""
     from docx.oxml.ns import qn
     from docx.oxml import OxmlElement
+    from docx.shared import Pt, RGBColor
 
     part = paragraph.part
     r_id = part.relate_to(
@@ -95,17 +96,30 @@ def _add_hyperlink(paragraph, url: str, text: str):
     hyperlink = OxmlElement("w:hyperlink")
     hyperlink.set(qn("r:id"), r_id)
 
-    r = OxmlElement("w:r")
+    run = OxmlElement("w:r")
     rPr = OxmlElement("w:rPr")
-    rStyle = OxmlElement("w:rStyle")
-    rStyle.set(qn("w:val"), "Hyperlink")
-    rPr.append(rStyle)
-    r.append(rPr)
+
+    # Blue colour
+    color = OxmlElement("w:color")
+    color.set(qn("w:val"), "0563C1")
+    rPr.append(color)
+
+    # Underline
+    u = OxmlElement("w:u")
+    u.set(qn("w:val"), "single")
+    rPr.append(u)
+
+    # Font size matching body text
+    sz = OxmlElement("w:sz")
+    sz.set(qn("w:val"), str(Pt(11).pt * 2))  # half-points
+    rPr.append(sz)
+
+    run.append(rPr)
 
     t = OxmlElement("w:t")
     t.text = text
-    r.append(t)
-    hyperlink.append(r)
+    run.append(t)
+    hyperlink.append(run)
     paragraph._p.append(hyperlink)
     return hyperlink
 
@@ -166,7 +180,7 @@ def md_to_docx(
             else:
                 r_title = r.title or r.url
                 r_url = r.url
-            para = doc.add_paragraph(f"{i}. ", style="List Number")
+            para = doc.add_paragraph(f"{i}. ")
             _add_hyperlink(para, r_url, r_title)
 
     doc.save(path)
diff --git a/convert.py b/convert.py
@@ -25,12 +25,21 @@
 
 import argparse
 import json
+import re
 import sys
 from pathlib import Path
 
 from artemis.writers import write_json, write_markdown, md_to_docx
 
 
+def _strip_think_tags(text: str) -> str:
+    """Remove <think>...</think> reasoning blocks and orphaned tags."""
+    text = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL)
+    text = re.sub(r"^.*?</think>", "", text, flags=re.DOTALL)
+    text = re.sub(r"<think>.*$", "", text, flags=re.DOTALL)
+    return text.strip()
+
+
 def _extract_from_artemis_json(data: dict) -> tuple[str, str | None, list, dict | None]:
     """Extract essay, title, results, and usage from an Artemis API response.
 
@@ -80,11 +89,12 @@ def _load_input(path: str) -> tuple[str, str | None, list, dict | None]:
         if isinstance(data, dict):
             # Artemis API response format (has "output" key)
             if "output" in data:
-                return _extract_from_artemis_json(data)
+                essay, title, results, usage = _extract_from_artemis_json(data)
+                return _strip_think_tags(essay), title, results, usage
             # CLI JSON format (has "essay" key)
             if "essay" in data:
                 return (
-                    data["essay"],
+                    _strip_think_tags(data["essay"]),
                     data.get("query"),
                     data.get("results", []),
                     data.get("usage"),
@@ -94,7 +104,7 @@ def _load_input(path: str) -> tuple[str, str | None, list, dict | None]:
 
     # Treat as raw Markdown
     title = input_path.stem.replace("-", " ").replace("_", " ").title()
-    return text, title, [], None
+    return _strip_think_tags(text), title, [], None
 
 
 def main() -> None: