Skip to content

Commit 0b59059

Browse files
antonsyndclaude
andcommitted
fix: preserve XML doc references in generated stdlib pages and add tests
Fix _parse_xml_doc to use inner XML content instead of text-only extraction, so <see cref>, <paramref>, and <c> tags are properly converted to markdown backtick/italic notation in generated docs. Previously these were silently dropped, leaving blank references. Add 85 pytest tests for generate_stdlib_docs.py covering name mangling (20 tests), type mapping (17), generic arg splitting (5), XML doc parsing (12), parameter parsing (7), C# file parsing (9), and markdown rendering (8). Regenerate all 35 stdlib pages with the fix. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent f65d4bf commit 0b59059

30 files changed

Lines changed: 1050 additions & 459 deletions

build_tools/generate_stdlib_docs.py

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -227,30 +227,32 @@ def _parse_xml_doc(lines: list[str]) -> dict:
227227

228228
result: dict = {}
229229

230+
def _inner_xml(el: ElementTree.Element) -> str:
231+
"""Get inner XML content of an element, preserving child tags."""
232+
parts = [el.text or ""]
233+
for child in el:
234+
# tostring() includes the child's tail text automatically
235+
parts.append(ElementTree.tostring(child, encoding="unicode"))
236+
return "".join(parts)
237+
230238
# Summary
231239
summary_el = root.find("summary")
232240
if summary_el is not None:
233-
result["summary"] = _strip_xml_tags(
234-
ElementTree.tostring(summary_el, encoding="unicode", method="text") or ""
235-
).strip()
241+
result["summary"] = _strip_xml_tags(_inner_xml(summary_el)).strip()
236242

237243
# Parameters
238244
params = []
239245
for param_el in root.findall("param"):
240246
name = param_el.get("name", "")
241-
desc = _strip_xml_tags(
242-
ElementTree.tostring(param_el, encoding="unicode", method="text") or ""
243-
).strip()
247+
desc = _strip_xml_tags(_inner_xml(param_el)).strip()
244248
params.append((name, desc))
245249
if params:
246250
result["params"] = params
247251

248252
# Returns
249253
returns_el = root.find("returns")
250254
if returns_el is not None:
251-
result["returns"] = _strip_xml_tags(
252-
ElementTree.tostring(returns_el, encoding="unicode", method="text") or ""
253-
).strip()
255+
result["returns"] = _strip_xml_tags(_inner_xml(returns_el)).strip()
254256

255257
# Example
256258
example_el = root.find("example")
@@ -264,17 +266,13 @@ def _parse_xml_doc(lines: list[str]) -> dict:
264266
# Remarks
265267
remarks_el = root.find("remarks")
266268
if remarks_el is not None:
267-
result["remarks"] = _strip_xml_tags(
268-
ElementTree.tostring(remarks_el, encoding="unicode", method="text") or ""
269-
).strip()
269+
result["remarks"] = _strip_xml_tags(_inner_xml(remarks_el)).strip()
270270

271271
# Exceptions
272272
exceptions = []
273273
for exc_el in root.findall("exception"):
274274
cref = exc_el.get("cref", "")
275-
desc = _strip_xml_tags(
276-
ElementTree.tostring(exc_el, encoding="unicode", method="text") or ""
277-
).strip()
275+
desc = _strip_xml_tags(_inner_xml(exc_el)).strip()
278276
exceptions.append((cref, desc))
279277
if exceptions:
280278
result["exceptions"] = exceptions
@@ -283,9 +281,7 @@ def _parse_xml_doc(lines: list[str]) -> dict:
283281
typeparams = []
284282
for tp_el in root.findall("typeparam"):
285283
name = tp_el.get("name", "")
286-
desc = _strip_xml_tags(
287-
ElementTree.tostring(tp_el, encoding="unicode", method="text") or ""
288-
).strip()
284+
desc = _strip_xml_tags(_inner_xml(tp_el)).strip()
289285
typeparams.append((name, desc))
290286
if typeparams:
291287
result["typeparams"] = typeparams

0 commit comments

Comments
 (0)