Skip to content

Commit afc659f

Browse files
committed
feat: Centralize slugify utility, inline XML serialization, and consolidate load_dotenv calls.
1 parent 95351d5 commit afc659f

File tree

9 files changed

+64
-200
lines changed

9 files changed

+64
-200
lines changed

src/compendiumscribe/cli.py

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import click
88

9-
from .compendium import Compendium
9+
from .compendium import Compendium, slugify
1010
from .create_llm_clients import (
1111
MissingAPIKeyError,
1212
create_openai_client,
@@ -22,13 +22,6 @@
2222
)
2323

2424

25-
def _generate_slug(topic: str) -> str:
26-
slug = re.sub(r"[^a-z0-9]+", "-", topic.lower()).strip("-")
27-
if not slug:
28-
slug = "compendium"
29-
return slug
30-
31-
3225

3326
@click.group()
3427
def cli() -> None:
@@ -134,7 +127,7 @@ def handle_progress(update: ResearchProgress) -> None:
134127
if output_path:
135128
base_path = output_path.parent / output_path.stem
136129
else:
137-
slug = _generate_slug(topic)
130+
slug = slugify(topic)
138131
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
139132
base_path = Path(f"{slug}_{timestamp}")
140133

@@ -229,7 +222,7 @@ def recover(input_file: Path):
229222

230223
click.echo("Research completed! Writing outputs.")
231224

232-
slug = _generate_slug(topic)
225+
slug = slugify(topic)
233226
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
234227
base_path = Path(f"{slug}_{timestamp}")
235228

src/compendiumscribe/compendium/__init__.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@
55
from .pdf import render_pdf
66
from .text_utils import (
77
format_html_text,
8-
format_plain_text,
9-
iter_markdown_links,
8+
slugify,
109
)
1110
from .xml_utils import etree_to_string
1211

@@ -17,7 +16,6 @@
1716
"Section",
1817
"render_pdf",
1918
"format_html_text",
20-
"format_plain_text",
21-
"iter_markdown_links",
19+
"slugify",
2220
"etree_to_string",
2321
]

src/compendiumscribe/compendium/compendium.py

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from .html_site_renderer import render_html_site
1010
from .markdown_renderer import render_markdown
1111
from .payload_parser import build_from_payload
12-
from .xml_serializer import build_xml_root, render_xml_string
12+
from .xml_utils import etree_to_string
1313

1414

1515
@dataclass
@@ -27,10 +27,54 @@ class Compendium:
2727
)
2828

2929
def to_xml(self) -> ET.Element:
30-
return build_xml_root(self)
30+
"""Return an XML element representing the compendium."""
31+
root = ET.Element(
32+
"compendium",
33+
attrib={
34+
"topic": self.topic,
35+
"generated_at": self.generated_at.replace(
36+
microsecond=0
37+
).isoformat(),
38+
},
39+
)
40+
41+
overview_elem = ET.SubElement(root, "overview")
42+
overview_elem.text = self.overview
43+
44+
if self.methodology:
45+
methodology_elem = ET.SubElement(root, "methodology")
46+
for step in self.methodology:
47+
ET.SubElement(methodology_elem, "step").text = step
48+
49+
if self.sections:
50+
sections_elem = ET.SubElement(root, "sections")
51+
for section in self.sections:
52+
sections_elem.append(section.to_xml())
53+
54+
if self.open_questions:
55+
questions_elem = ET.SubElement(root, "open_questions")
56+
for question in self.open_questions:
57+
ET.SubElement(questions_elem, "question").text = question
58+
59+
if self.citations:
60+
citations_elem = ET.SubElement(root, "citations")
61+
for citation in self.citations:
62+
citations_elem.append(citation.to_xml())
63+
64+
return root
3165

3266
def to_xml_string(self) -> str:
33-
return render_xml_string(self)
67+
"""Render the compendium to a UTF-8 XML string with CDATA wrapping."""
68+
cdata_tags = {
69+
"overview",
70+
"summary",
71+
"evidence",
72+
"implications",
73+
"step",
74+
"question",
75+
"title",
76+
}
77+
return etree_to_string(self.to_xml(), cdata_tags=cdata_tags)
3478

3579
def to_markdown(self) -> str:
3680
"""Render the compendium as human-readable Markdown."""

src/compendiumscribe/compendium/html_site_renderer.py

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,13 @@
55
import html
66
from typing import TYPE_CHECKING
77

8-
from .text_utils import format_html_text
8+
from .text_utils import format_html_text, slugify
99

1010
if TYPE_CHECKING: # pragma: no cover - hints only
1111
from .compendium import Compendium
1212
from .entities import Citation, Section
1313

1414

15-
def _slugify(text: str) -> str:
16-
"""Convert text to a URL-friendly slug."""
17-
import re
18-
19-
slug = re.sub(r"[^a-z0-9]+", "-", text.lower()).strip("-")
20-
return slug or "page"
21-
22-
2315
def _html_head(title: str, depth: int = 0) -> list[str]:
2416
"""Generate HTML head section."""
2517
return [
@@ -50,7 +42,7 @@ def _nav_links(
5042
parts.append(" <li>Sections:")
5143
parts.append(" <ul>")
5244
for section in sections:
53-
section_slug = _slugify(section.identifier)
45+
section_slug = slugify(section.identifier)
5446
href = f"{prefix}sections/{section_slug}.html"
5547
label = html.escape(section.title)
5648
parts.append(f' <li><a href="{href}">{label}</a></li>')
@@ -105,7 +97,7 @@ def _render_index_page(compendium: "Compendium") -> str:
10597
parts.append(" <h2>Sections</h2>")
10698
parts.append(" <ul>")
10799
for section in compendium.sections:
108-
section_slug = _slugify(section.identifier)
100+
section_slug = slugify(section.identifier)
109101
href = f"sections/{section_slug}.html"
110102
label = html.escape(section.title)
111103
summary = format_html_text(section.summary)
@@ -320,7 +312,7 @@ def render_html_site(compendium: "Compendium") -> dict[str, str]:
320312

321313
# Section pages
322314
for section in compendium.sections:
323-
section_slug = _slugify(section.identifier)
315+
section_slug = slugify(section.identifier)
324316
path = f"sections/{section_slug}.html"
325317
files[path] = _render_section_page(section, compendium)
326318

src/compendiumscribe/compendium/text_utils.py

Lines changed: 6 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -5,69 +5,14 @@
55
import mistune
66

77

8-
def iter_markdown_links(text: str) -> Iterator[tuple[int, int, str, str]]:
9-
"""Yield ranges and components for Markdown-style inline links."""
8+
def slugify(text: str) -> str:
9+
"""Convert text to a URL-friendly slug."""
10+
import re
1011

11-
index = 0
12-
length = len(text)
13-
while index < length:
14-
# Avoid complexity if no '['
15-
start = text.find("[", index)
16-
if start == -1:
17-
break
12+
slug = re.sub(r"[^a-z0-9]+", "-", text.lower()).strip("-")
13+
return slug or "page"
1814

19-
end_label = text.find("]", start + 1)
20-
if end_label == -1:
21-
break
22-
if end_label + 1 >= length or text[end_label + 1] != "(":
23-
index = end_label + 1
24-
continue
2515

26-
url_start = end_label + 2
27-
depth = 1
28-
position = url_start
29-
while position < length and depth > 0:
30-
char = text[position]
31-
if char == "(":
32-
depth += 1
33-
elif char == ")":
34-
depth -= 1
35-
if depth == 0:
36-
break
37-
position += 1
38-
39-
if depth != 0:
40-
break
41-
42-
url_end = position
43-
label = text[start + 1:end_label]
44-
url = text[url_start:url_end]
45-
yield start, url_end + 1, label, url
46-
index = url_end + 1
47-
48-
49-
def format_plain_text(text: str) -> str:
50-
"""Replace Markdown-style links with plain text equivalents."""
51-
52-
if not text:
53-
return text
54-
55-
segments: list[str] = []
56-
cursor = 0
57-
transformed = False
58-
for start, end, label, url in iter_markdown_links(text):
59-
segments.append(text[cursor:start])
60-
clean_url = url.strip()
61-
replacement = f"{label} ({clean_url})" if clean_url else label
62-
segments.append(replacement)
63-
cursor = end
64-
transformed = True
65-
66-
if not transformed:
67-
return text
68-
69-
segments.append(text[cursor:])
70-
return "".join(segments)
7116

7217

7318
def format_html_text(text: str | None) -> str:
@@ -93,7 +38,6 @@ def format_html_text(text: str | None) -> str:
9338

9439

9540
__all__ = [
96-
"iter_markdown_links",
97-
"format_plain_text",
41+
"slugify",
9842
"format_html_text",
9943
]

src/compendiumscribe/compendium/xml_serializer.py

Lines changed: 0 additions & 71 deletions
This file was deleted.

src/compendiumscribe/research/__init__.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,7 @@
3030
)
3131
from .utils import (
3232
coerce_optional_string,
33-
first_non_empty,
3433
get_field,
35-
stringify_metadata_value,
36-
truncate_text,
3734
)
3835

3936
__all__ = [
@@ -57,8 +54,5 @@
5754
"execute_deep_research",
5855
"await_completion",
5956
"coerce_optional_string",
60-
"first_non_empty",
6157
"get_field",
62-
"stringify_metadata_value",
63-
"truncate_text",
6458
]

src/compendiumscribe/research/config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111

1212
from .errors import MissingConfigurationError
1313

14+
load_dotenv()
15+
1416

1517
@dataclass
1618
class ResearchConfig:
@@ -42,7 +44,6 @@ class ResearchConfig:
4244

4345

4446
def _default_deep_research_model() -> str:
45-
load_dotenv()
4647
# Check specific env var first, then fallback to generic
4748
model = os.getenv("DEEP_RESEARCH_MODEL") or os.getenv("RESEARCH_MODEL")
4849
if not model:
@@ -53,7 +54,6 @@ def _default_deep_research_model() -> str:
5354

5455

5556
def _default_prompt_refiner_model() -> str:
56-
load_dotenv()
5757
model = os.getenv("PROMPT_REFINER_MODEL")
5858
if not model:
5959
raise MissingConfigurationError(

0 commit comments

Comments
 (0)