Skip to content

Commit fab2172

Browse files
SkylarKeltyclaude
andcommitted
fix: handle orphaned think tags and fix DOCX sources styling
- Strip orphaned </think> (no matching open) and unclosed <think> tags in llm.py, not just matched pairs - Apply same stripping in convert.py for already-saved responses - Fix DOCX sources: remove double-numbering from List Number style, style hyperlinks with blue underline directly instead of relying on a missing Hyperlink character style Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 84662e8 commit fab2172

File tree

3 files changed

+43
-14
lines changed

3 files changed

+43
-14
lines changed

artemis/llm.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -374,8 +374,13 @@ async def chat_completion(
374374
if not isinstance(content, str) or not content.strip():
375375
raise UpstreamServiceError("The LLM backend returned empty content.")
376376

377-
# Strip <think>...</think> reasoning blocks some models emit
378-
content = re.sub(r"<think>.*?</think>", "", content, flags=re.DOTALL).strip()
377+
# Strip <think>...</think> reasoning blocks some models emit.
378+
# Also handle orphaned tags: leading content before a lone </think>
379+
# (opening tag was outside this response) or trailing <think> without close.
380+
content = re.sub(r"<think>.*?</think>", "", content, flags=re.DOTALL)
381+
content = re.sub(r"^.*?</think>", "", content, flags=re.DOTALL)
382+
content = re.sub(r"<think>.*$", "", content, flags=re.DOTALL)
383+
content = content.strip()
379384
if not content:
380385
raise UpstreamServiceError("The LLM backend returned empty content.")
381386

artemis/writers.py

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,10 @@ def write_markdown(
8181

8282

8383
def _add_hyperlink(paragraph, url: str, text: str):
84-
"""Add a clickable hyperlink to a python-docx paragraph."""
84+
"""Add a clickable, blue-underlined hyperlink to a python-docx paragraph."""
8585
from docx.oxml.ns import qn
8686
from docx.oxml import OxmlElement
87+
from docx.shared import Pt, RGBColor
8788

8889
part = paragraph.part
8990
r_id = part.relate_to(
@@ -95,17 +96,30 @@ def _add_hyperlink(paragraph, url: str, text: str):
9596
hyperlink = OxmlElement("w:hyperlink")
9697
hyperlink.set(qn("r:id"), r_id)
9798

98-
r = OxmlElement("w:r")
99+
run = OxmlElement("w:r")
99100
rPr = OxmlElement("w:rPr")
100-
rStyle = OxmlElement("w:rStyle")
101-
rStyle.set(qn("w:val"), "Hyperlink")
102-
rPr.append(rStyle)
103-
r.append(rPr)
101+
102+
# Blue colour
103+
color = OxmlElement("w:color")
104+
color.set(qn("w:val"), "0563C1")
105+
rPr.append(color)
106+
107+
# Underline
108+
u = OxmlElement("w:u")
109+
u.set(qn("w:val"), "single")
110+
rPr.append(u)
111+
112+
# Font size matching body text
113+
sz = OxmlElement("w:sz")
114+
sz.set(qn("w:val"), str(Pt(11).pt * 2)) # half-points
115+
rPr.append(sz)
116+
117+
run.append(rPr)
104118

105119
t = OxmlElement("w:t")
106120
t.text = text
107-
r.append(t)
108-
hyperlink.append(r)
121+
run.append(t)
122+
hyperlink.append(run)
109123
paragraph._p.append(hyperlink)
110124
return hyperlink
111125

@@ -166,7 +180,7 @@ def md_to_docx(
166180
else:
167181
r_title = r.title or r.url
168182
r_url = r.url
169-
para = doc.add_paragraph(f"{i}. ", style="List Number")
183+
para = doc.add_paragraph(f"{i}. ")
170184
_add_hyperlink(para, r_url, r_title)
171185

172186
doc.save(path)

convert.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,21 @@
2525

2626
import argparse
2727
import json
28+
import re
2829
import sys
2930
from pathlib import Path
3031

3132
from artemis.writers import write_json, write_markdown, md_to_docx
3233

3334

35+
def _strip_think_tags(text: str) -> str:
36+
"""Remove <think>...</think> reasoning blocks and orphaned tags."""
37+
text = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL)
38+
text = re.sub(r"^.*?</think>", "", text, flags=re.DOTALL)
39+
text = re.sub(r"<think>.*$", "", text, flags=re.DOTALL)
40+
return text.strip()
41+
42+
3443
def _extract_from_artemis_json(data: dict) -> tuple[str, str | None, list, dict | None]:
3544
"""Extract essay, title, results, and usage from an Artemis API response.
3645
@@ -80,11 +89,12 @@ def _load_input(path: str) -> tuple[str, str | None, list, dict | None]:
8089
if isinstance(data, dict):
8190
# Artemis API response format (has "output" key)
8291
if "output" in data:
83-
return _extract_from_artemis_json(data)
92+
essay, title, results, usage = _extract_from_artemis_json(data)
93+
return _strip_think_tags(essay), title, results, usage
8494
# CLI JSON format (has "essay" key)
8595
if "essay" in data:
8696
return (
87-
data["essay"],
97+
_strip_think_tags(data["essay"]),
8898
data.get("query"),
8999
data.get("results", []),
90100
data.get("usage"),
@@ -94,7 +104,7 @@ def _load_input(path: str) -> tuple[str, str | None, list, dict | None]:
94104

95105
# Treat as raw Markdown
96106
title = input_path.stem.replace("-", " ").replace("_", " ").title()
97-
return text, title, [], None
107+
return _strip_think_tags(text), title, [], None
98108

99109

100110
def main() -> None:

0 commit comments

Comments
 (0)