Skip to content

Commit 9a769a0

Browse files
authored
DOP-1584: manpage builder (#270)
* DOP-1584: manpage builder * Incorporate Claire's feedback * Remove some dead/unnecessary code * Fix crash when a nonexisting manpage is requested
1 parent 9406b04 commit 9a769a0

File tree

7 files changed

+679
-13
lines changed

7 files changed

+679
-13
lines changed

snooty/builders/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
"""Builders are modules that ingest an AST and emit an output format. For example,
2+
HTML or troff."""

snooty/builders/man.py

Lines changed: 392 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,392 @@
1+
import io
2+
from dataclasses import dataclass, field
3+
from enum import Enum, auto
4+
from typing import Dict, Iterable, List, Union, cast
5+
6+
from .. import n
7+
from ..page import Page
8+
from ..types import FileId
9+
10+
11+
def troff_escape(value: str) -> str:
12+
"""Escape values that troff may interpret."""
13+
value = value.replace(r"\\", r"\e")
14+
replace_pairs = [
15+
("-", r"\-"),
16+
(r"'", r"\(aq"),
17+
("´", r"\'"),
18+
("`", r"\(ga"),
19+
]
20+
21+
for (in_char, out_markup) in replace_pairs:
22+
value = value.replace(in_char, out_markup)
23+
24+
# prevent interpretation of "." at line start
25+
if value.startswith("."):
26+
return r"\&" + value
27+
28+
return value
29+
30+
31+
@dataclass
32+
class ManNode:
33+
"""An intermediate representation node that acts as a middle step
34+
between the Snooty AST and a troff document."""
35+
36+
class ElementType(Enum):
37+
MANPAGE = auto()
38+
SECTION = auto()
39+
PARAGRAPH = auto()
40+
URL = auto()
41+
STRONG = auto()
42+
EMPHASIS = auto()
43+
LIST = auto()
44+
LIST_ITEM = auto()
45+
TEXT = auto()
46+
INDENT = auto()
47+
PREFORMATTED = auto()
48+
49+
element: ElementType
50+
children: Union[str, List["ManNode"]]
51+
attributes: Dict[str, str] = field(default_factory=dict)
52+
53+
def to_troff(self) -> str:
54+
"""Transform this node into a troff document string."""
55+
handler = TroffNodeHandler()
56+
57+
def handle_node(node: "ManNode") -> None:
58+
"""Call relevant handlers in TroffNodeHandler for a given node."""
59+
handler.handle_start(node)
60+
if isinstance(node.children, str):
61+
assert node.element in {
62+
self.ElementType.TEXT,
63+
self.ElementType.PREFORMATTED,
64+
}
65+
handler.handle_text(node.children)
66+
else:
67+
for child in node.children:
68+
handle_node(child)
69+
handler.handle_end(node)
70+
71+
handle_node(self)
72+
return handler.output.getvalue()
73+
74+
75+
class Formatting(Enum):
76+
BOLD = auto()
77+
EMPHASIS = auto()
78+
79+
80+
class TroffNodeHandler:
81+
def __init__(self) -> None:
82+
self.text_buffer = io.StringIO()
83+
self.output = io.StringIO()
84+
self.formatting_stack: List[Formatting] = []
85+
self.list_stack: List[str] = []
86+
self.section_depth = 0
87+
88+
self.need_paragraph_splitter = False
89+
self.trailing_newline = True
90+
91+
def macro(self, name: str, arg: str = "") -> None:
92+
"""Flush any pending text, and write out a troff macro expression."""
93+
self.flush()
94+
if not self.trailing_newline:
95+
self.output.write("\n")
96+
self.output.write(f".{name}{' ' + arg if arg else ''}\n")
97+
self.trailing_newline = True
98+
99+
def write_raw(self, raw: str) -> None:
100+
if not raw:
101+
return
102+
103+
self.output.write(raw)
104+
self.trailing_newline = raw.endswith("\n")
105+
106+
def flush(self) -> None:
107+
self.write_raw(self.text_buffer.getvalue())
108+
self.text_buffer = io.StringIO()
109+
110+
def handle_start(self, node: ManNode) -> None:
111+
if node.element in {
112+
ManNode.ElementType.PARAGRAPH,
113+
ManNode.ElementType.PREFORMATTED,
114+
}:
115+
if self.need_paragraph_splitter:
116+
if self.list_stack:
117+
self.macro("IP")
118+
else:
119+
self.macro("PP")
120+
121+
if node.element is ManNode.ElementType.MANPAGE:
122+
self.macro("TH", f"{node.attributes['name']} {node.attributes['section']}")
123+
elif node.element is ManNode.ElementType.SECTION:
124+
self.section_depth += 1
125+
macro_name = "SH" if self.section_depth <= 2 else "SS"
126+
self.macro(macro_name, node.attributes["name"].upper())
127+
elif node.element is ManNode.ElementType.URL:
128+
# GNU groff has a .UR/.UE macro set for urls. They work a little
129+
# oddly and don't seem to do anything on some platforms, so don't use that.
130+
pass
131+
elif node.element is ManNode.ElementType.STRONG:
132+
self.push_formatting(Formatting.BOLD)
133+
elif node.element is ManNode.ElementType.EMPHASIS:
134+
self.push_formatting(Formatting.EMPHASIS)
135+
elif node.element is ManNode.ElementType.LIST:
136+
self.list_stack.append(node.attributes["type"])
137+
self.macro("RS")
138+
elif node.element is ManNode.ElementType.LIST_ITEM:
139+
assert self.list_stack
140+
self.need_paragraph_splitter = False
141+
self.macro("IP", f"\\(bu {len(self.list_stack) * 2}")
142+
elif node.element is ManNode.ElementType.INDENT:
143+
self.macro("RS")
144+
elif node.element is ManNode.ElementType.PREFORMATTED:
145+
self.macro("EX")
146+
147+
def handle_end(self, node: ManNode) -> None:
148+
self.flush()
149+
150+
if node.element in {
151+
ManNode.ElementType.PARAGRAPH,
152+
ManNode.ElementType.PREFORMATTED,
153+
}:
154+
self.need_paragraph_splitter = True
155+
156+
if node.element is ManNode.ElementType.MANPAGE:
157+
pass
158+
elif node.element is ManNode.ElementType.SECTION:
159+
self.section_depth -= 1
160+
elif node.element is ManNode.ElementType.URL:
161+
self.handle_text(f" ({node.attributes['href']})")
162+
elif node.element is ManNode.ElementType.STRONG:
163+
self.pop_formatting()
164+
elif node.element is ManNode.ElementType.EMPHASIS:
165+
self.pop_formatting()
166+
elif node.element is ManNode.ElementType.LIST:
167+
self.list_stack.pop()
168+
self.macro("RE")
169+
elif node.element is ManNode.ElementType.LIST_ITEM:
170+
pass
171+
elif node.element is ManNode.ElementType.INDENT:
172+
self.macro("RE")
173+
elif node.element is ManNode.ElementType.PREFORMATTED:
174+
self.macro("EE")
175+
176+
def handle_text(self, text: str) -> None:
177+
self.text_buffer.write(troff_escape(text))
178+
179+
def push_formatting(self, formatting: Formatting) -> None:
180+
if not self.formatting_stack or formatting is not self.formatting_stack[-1]:
181+
if formatting is Formatting.BOLD:
182+
self.write_raw("\\fB")
183+
elif formatting is Formatting.EMPHASIS:
184+
self.write_raw("\\fI")
185+
186+
self.formatting_stack.append(formatting)
187+
188+
def pop_formatting(self) -> None:
189+
self.formatting_stack.pop()
190+
if len(self.formatting_stack) > 1:
191+
a = self.formatting_stack[-1]
192+
if a is Formatting.BOLD:
193+
self.write_raw("\\fB")
194+
elif a is Formatting.EMPHASIS:
195+
self.write_raw("\\fI")
196+
else:
197+
self.write_raw("\\f1")
198+
199+
200+
class SnootyToTroffTree:
201+
"""Transforms snooty AST nodes to an intermediate representation of ManNodes."""
202+
203+
def handle(self, node: n.Node) -> List[ManNode]:
204+
return cast(List[ManNode], getattr(self, f"handle_{type(node).__name__}")(node))
205+
206+
def children(self, nodes: Iterable[n.Node]) -> List[ManNode]:
207+
list_of_lists = [self.handle(child) for child in nodes]
208+
return [item for sublist in list_of_lists for item in sublist]
209+
210+
def handle_Code(self, node: n.Code) -> List[ManNode]:
211+
return [
212+
ManNode(
213+
ManNode.ElementType.PREFORMATTED,
214+
"\n".join(" " + line for line in node.value.split("\n")),
215+
)
216+
]
217+
218+
def handle_Section(self, node: n.Section) -> List[ManNode]:
219+
heading = next(
220+
(child for child in node.children if isinstance(child, n.Heading)), None
221+
)
222+
assert heading is not None, "Section without heading"
223+
return [
224+
ManNode(
225+
ManNode.ElementType.SECTION,
226+
self.children(node.children),
227+
{"name": heading.get_text()},
228+
)
229+
]
230+
231+
def handle_Paragraph(self, node: n.Paragraph) -> List[ManNode]:
232+
return [ManNode(ManNode.ElementType.PARAGRAPH, self.children(node.children))]
233+
234+
def handle_Footnote(self, node: n.Footnote) -> List[ManNode]:
235+
return []
236+
237+
def handle_FootnoteReference(self, node: n.FootnoteReference) -> List[ManNode]:
238+
return self.children(node.children)
239+
240+
def handle_SubstitutionDefinition(
241+
self, node: n.SubstitutionDefinition
242+
) -> List[ManNode]:
243+
return []
244+
245+
def handle_SubstitutionReference(
246+
self, node: n.SubstitutionReference
247+
) -> List[ManNode]:
248+
return self.children(node.children)
249+
250+
def handle_Root(self, node: n.Root) -> List[ManNode]:
251+
return self.children(node.children)
252+
253+
def handle_Heading(self, node: n.Heading) -> List[ManNode]:
254+
return []
255+
256+
def handle_DefinitionListItem(self, node: n.DefinitionListItem) -> List[ManNode]:
257+
return [
258+
ManNode(
259+
ManNode.ElementType.PARAGRAPH,
260+
[
261+
ManNode(ManNode.ElementType.STRONG, self.children(node.term)),
262+
ManNode(ManNode.ElementType.INDENT, self.children(node.children)),
263+
],
264+
)
265+
]
266+
267+
def handle_DefinitionList(self, node: n.DefinitionList) -> List[ManNode]:
268+
return self.children(node.children)
269+
270+
def handle_ListNodeItem(self, node: n.ListNodeItem) -> List[ManNode]:
271+
return [ManNode(ManNode.ElementType.LIST_ITEM, self.children(node.children))]
272+
273+
def handle_ListNode(self, node: n.ListNode) -> List[ManNode]:
274+
return [
275+
ManNode(
276+
ManNode.ElementType.LIST,
277+
self.children(node.children),
278+
{
279+
"type": "unordered"
280+
if node.enumtype == n.ListEnumType.unordered
281+
else "ordered"
282+
},
283+
)
284+
]
285+
286+
def handle_Line(self, node: n.Line) -> List[ManNode]:
287+
return []
288+
289+
def handle_LineBlock(self, node: n.LineBlock) -> List[ManNode]:
290+
return []
291+
292+
def handle_Directive(self, node: n.Directive) -> List[ManNode]:
293+
return self.children(node.children)
294+
295+
def handle_TocTreeDirectiveEntry(
296+
self, node: n.TocTreeDirectiveEntry
297+
) -> List[ManNode]:
298+
return []
299+
300+
def handle_TocTreeDirective(self, node: n.TocTreeDirective) -> List[ManNode]:
301+
return []
302+
303+
def handle_DirectiveArgument(self, node: n.DirectiveArgument) -> List[ManNode]:
304+
return []
305+
306+
def handle_Target(self, node: n.Target) -> List[ManNode]:
307+
# Skip anything without a description
308+
if not node.children or all(
309+
(
310+
isinstance(child, (n.DirectiveArgument, n.TargetIdentifier))
311+
for child in node.children
312+
)
313+
):
314+
return []
315+
316+
names: List[ManNode] = []
317+
for identifier in node.get_child_of_type(n.TargetIdentifier):
318+
names.append(
319+
ManNode(
320+
ManNode.ElementType.STRONG,
321+
[ManNode(ManNode.ElementType.TEXT, identifier.get_text())],
322+
)
323+
)
324+
names.append(ManNode(ManNode.ElementType.TEXT, ", "))
325+
326+
if names[-1].element is ManNode.ElementType.TEXT:
327+
names.pop()
328+
329+
return [
330+
ManNode(
331+
ManNode.ElementType.PARAGRAPH,
332+
names
333+
+ [ManNode(ManNode.ElementType.INDENT, self.children(node.children)),],
334+
)
335+
]
336+
337+
def handle_TargetIdentifier(self, node: n.TargetIdentifier) -> List[ManNode]:
338+
return []
339+
340+
def handle_InlineTarget(self, node: n.InlineTarget) -> List[ManNode]:
341+
return []
342+
343+
def handle_Reference(self, node: n.Reference) -> List[ManNode]:
344+
return [
345+
ManNode(
346+
ManNode.ElementType.URL,
347+
self.children(node.children),
348+
{"href": node.refuri},
349+
)
350+
]
351+
352+
def handle_Role(self, node: n.Role) -> List[ManNode]:
353+
return self.children(node.children)
354+
355+
def handle_RefRole(self, node: n.RefRole) -> List[ManNode]:
356+
return [ManNode(ManNode.ElementType.STRONG, self.children(node.children))]
357+
358+
def handle_Text(self, node: n.Text) -> List[ManNode]:
359+
return [ManNode(ManNode.ElementType.TEXT, node.value)]
360+
361+
def handle_Literal(self, node: n.Literal) -> List[ManNode]:
362+
return [ManNode(ManNode.ElementType.STRONG, self.children(node.children))]
363+
364+
def handle_Emphasis(self, node: n.Emphasis) -> List[ManNode]:
365+
return [ManNode(ManNode.ElementType.EMPHASIS, self.children(node.children))]
366+
367+
def handle_Field(self, node: n.Field) -> List[ManNode]:
368+
return []
369+
370+
def handle_FieldList(self, node: n.FieldList) -> List[ManNode]:
371+
return []
372+
373+
def handle_Strong(self, node: n.Strong) -> List[ManNode]:
374+
return [ManNode(ManNode.ElementType.STRONG, self.children(node.children))]
375+
376+
def handle_Transition(self, node: n.Transition) -> List[ManNode]:
377+
return []
378+
379+
def handle_Table(self, node: n.Table) -> List[ManNode]:
380+
return []
381+
382+
383+
def render(page: Page, name: str, title: str, section: int) -> Dict[FileId, str]:
384+
"""Render the given page as a manpage."""
385+
root = ManNode(
386+
ManNode.ElementType.MANPAGE,
387+
SnootyToTroffTree().handle(page.ast),
388+
{"name": name, "section": str(section), "desc": title},
389+
)
390+
body = root.to_troff()
391+
392+
return {FileId(f"{name}.{section}"): body}

0 commit comments

Comments
 (0)