Skip to content

Commit d7e4a82

Browse files
antonsyndclaude
andcommitted
fix: correct stdlib doc generation — escaping, filtering, and accuracy
The generated stdlib documentation had multiple issues: special characters breaking markdown tables, internal implementation details leaking into public docs, duplicate properties, and inaccurate claims. Doc generator fixes (generate_stdlib_docs.py): - Escape |, `, \ in table cells; collapse multi-line summaries - Fix XML tag stripping to preserve <=> in punctuation description - Skip // comments between doc blocks and declarations (fixes nan empty desc) - Skip all operator declarations (implicit, explicit, true, false) - Filter public members inside internal/private classes - Only include files with "partial class Builtins" in builtins discovery - Deduplicate properties by name (fixes datetime.md duplicates) - Support multiple [SharpyModuleType] classes per file with line_range param - Use actual C# class names for type section headings C# source fixes: - Move // Note comment above /// block in Math.cs so nan gets its description - Add [SharpyModuleType] to ChainMap, Deque, Counter, OrderedDict - Add class-level summary to Dict - Fix sys.maxsize description (32-bit, not equivalent to Python's 2^63-1) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent cc8b4f7 commit d7e4a82

32 files changed

+373
-155
lines changed

build_tools/generate_stdlib_docs.py

Lines changed: 123 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ def _strip_xml_tags(text: str) -> str:
192192
text = re.sub(r"<see\s+cref=\"([^\"]+)\"\s*/>", r"`\1`", text)
193193
text = re.sub(r"<paramref\s+name=\"([^\"]+)\"\s*/>", r"*\1*", text)
194194
text = re.sub(r"<c>([^<]*)</c>", r"`\1`", text)
195-
text = re.sub(r"<[^>]+>", "", text)
195+
text = re.sub(r"</?[a-zA-Z][^>]*>", "", text)
196196
return text.strip()
197197

198198

@@ -382,6 +382,9 @@ def _collect_doc_lines(lines: list[str], decl_index: int) -> list[str]:
382382
if stripped.startswith("///"):
383383
doc_lines.insert(0, stripped)
384384
i -= 1
385+
elif stripped.startswith("//"):
386+
# Skip regular (non-doc) comments between doc block and declaration
387+
i -= 1
385388
elif stripped.startswith("[") or stripped.startswith("#") or stripped == "":
386389
# Skip attributes, preprocessor directives, and blank lines
387390
if doc_lines:
@@ -487,16 +490,58 @@ def parse_cs_file(
487490
filepath: Path,
488491
is_extension: bool = False,
489492
is_builtins: bool = False,
493+
line_range: tuple[int, int] | None = None,
490494
) -> list[DocMember]:
491-
"""Parse a C# file and extract documented public members."""
495+
"""Parse a C# file and extract documented public members.
496+
497+
If *line_range* is given as ``(start, end)`` (0-based inclusive), only
498+
declarations within that line range are considered.
499+
"""
492500
text = filepath.read_text(encoding="utf-8")
493501
lines = text.split("\n")
494502
members: list[DocMember] = []
503+
504+
# Pre-scan: find line ranges of internal/private classes to exclude
505+
_non_public_ranges: list[tuple[int, int]] = []
506+
_brace_depth = 0
507+
_in_non_public = False
508+
_seen_open_brace = False
509+
_non_public_start = -1
510+
for _idx, _line in enumerate(lines):
511+
_s = _line.strip()
512+
if not _in_non_public and re.match(
513+
r"(?:internal|private|protected)\s+(?:(?:sealed|abstract|static|partial)\s+)*"
514+
r"(?:class|struct)\s",
515+
_s,
516+
):
517+
_in_non_public = True
518+
_non_public_start = _idx
519+
_brace_depth = 0
520+
_seen_open_brace = False
521+
if _in_non_public:
522+
_brace_depth += _s.count("{") - _s.count("}")
523+
if "{" in _s:
524+
_seen_open_brace = True
525+
if _seen_open_brace and _brace_depth <= 0 and _non_public_start >= 0:
526+
_non_public_ranges.append((_non_public_start, _idx))
527+
_in_non_public = False
528+
_non_public_start = -1
529+
495530
i = 0
496531

497532
while i < len(lines):
498533
stripped = lines[i].strip()
499534

535+
# Skip lines outside the requested range
536+
if line_range is not None and not (line_range[0] <= i <= line_range[1]):
537+
i += 1
538+
continue
539+
540+
# Skip members inside internal/private classes
541+
if any(start <= i <= end for start, end in _non_public_ranges):
542+
i += 1
543+
continue
544+
500545
# Only process lines starting with 'public'
501546
if not stripped.startswith("public "):
502547
i += 1
@@ -529,6 +574,12 @@ def parse_cs_file(
529574
i = end_i + 1
530575
continue
531576

577+
# Skip any operator declaration (implicit, explicit, true, false, +, -, etc.)
578+
pre_paren = joined.split("(")[0] if "(" in joined else joined
579+
if " operator " in pre_paren:
580+
i = end_i + 1
581+
continue
582+
532583
# Methods (must have parentheses)
533584
method_match = _METHOD_PATTERN.match(joined)
534585
if method_match and "(" in joined:
@@ -671,19 +722,48 @@ def discover_modules(core_dir: Path) -> list[DocModule]:
671722
if file_summary:
672723
summary = file_summary
673724

674-
# Check if this is a SharpyModuleType
725+
# Check if this file contains SharpyModuleType-annotated classes
675726
file_text = cs_file.read_text(encoding="utf-8")
676-
type_match = re.search(r'\[SharpyModuleType\("([^"]+)"\)\]', file_text)
677-
if type_match:
678-
type_name = type_match.group(1)
679-
type_summary = _get_class_summary(cs_file)
680-
type_members = parse_cs_file(cs_file)
681-
all_types.append(DocType(
682-
name=type_name,
683-
cs_name=cs_file.stem,
684-
summary=type_summary,
685-
members=type_members,
686-
))
727+
type_annotations = list(re.finditer(
728+
r'\[SharpyModuleType\("([^"]+)"\)\]', file_text,
729+
))
730+
if type_annotations:
731+
# Find all annotated class names and their line positions
732+
file_lines = file_text.split("\n")
733+
annotated_classes: list[tuple[str, int, int]] = []
734+
for ta in type_annotations:
735+
after = file_text[ta.end():]
736+
cm = re.search(
737+
r"public\s+(?:sealed\s+|abstract\s+|static\s+|partial\s+)*"
738+
r"class\s+(\w+)",
739+
after,
740+
)
741+
if cm:
742+
class_name = cm.group(1)
743+
class_pos = ta.end() + cm.start()
744+
class_line = file_text[:class_pos].count("\n")
745+
annotated_classes.append((class_name, class_line, 0))
746+
747+
# Compute end lines (start of next class or EOF)
748+
for ci in range(len(annotated_classes)):
749+
name, start, _ = annotated_classes[ci]
750+
end = (annotated_classes[ci + 1][1] - 1
751+
if ci + 1 < len(annotated_classes)
752+
else len(file_lines) - 1)
753+
annotated_classes[ci] = (name, start, end)
754+
755+
# Parse each class range separately
756+
for class_name, start, end in annotated_classes:
757+
type_summary = _get_class_summary(cs_file)
758+
type_members = parse_cs_file(
759+
cs_file, line_range=(start, end),
760+
)
761+
all_types.append(DocType(
762+
name=class_name,
763+
cs_name=cs_file.stem,
764+
summary=type_summary,
765+
members=type_members,
766+
))
687767
else:
688768
members = parse_cs_file(cs_file)
689769
all_members.extend(members)
@@ -750,12 +830,15 @@ def discover_builtins(core_dir: Path) -> DocModule:
750830
members = parse_cs_file(cs_file, is_builtins=True)
751831
all_members.extend(members)
752832

753-
# Builtins/ subdirectory
833+
# Builtins/ subdirectory — only files containing partial class Builtins
754834
builtins_dir = core_dir / "Builtins"
755835
if builtins_dir.exists():
756836
for cs_file in sorted(builtins_dir.glob("*.cs")):
757837
if cs_file.name == "__Init__.cs":
758838
continue
839+
text = cs_file.read_text(encoding="utf-8")
840+
if "partial class Builtins" not in text:
841+
continue
759842
members = parse_cs_file(cs_file, is_builtins=True)
760843
all_members.extend(members)
761844

@@ -776,6 +859,19 @@ def _one_line(text: str) -> str:
776859
return " ".join(text.split()).strip()
777860

778861

862+
def _escape_table_cell(text: str) -> str:
863+
"""Escape text for use inside a markdown table cell.
864+
865+
Collapses to a single line, then escapes pipe characters and backticks
866+
so they don't break the table structure or inline code formatting.
867+
"""
868+
text = _one_line(text)
869+
text = text.replace("\\", "\\\\")
870+
text = text.replace("|", "\\|")
871+
text = text.replace("`", "\\`")
872+
return text
873+
874+
779875
def _render_member(member: DocMember, prefix: str = "") -> str:
780876
"""Render a single member to markdown."""
781877
lines = []
@@ -837,7 +933,7 @@ def _render_constants_table(constants: list[DocMember]) -> str:
837933
lines = ["## Constants", "", "| Name | Type | Description |",
838934
"|------|------|-------------|"]
839935
for c in constants:
840-
lines.append(f"| `{c.name}` | `{c.return_type}` | {c.summary} |")
936+
lines.append(f"| `{c.name}` | `{c.return_type}` | {_escape_table_cell(c.summary)} |")
841937
lines.append("")
842938
return "\n".join(lines)
843939

@@ -861,15 +957,21 @@ def render_module_page(module: DocModule) -> str:
861957
if constants:
862958
lines.append(_render_constants_table(constants))
863959

864-
# Properties
960+
# Properties (deduplicated by name — multiple classes may define the same property)
865961
properties = [m for m in module.members if m.kind == "property"]
866962
if properties:
963+
seen_props: set[str] = set()
964+
unique_props: list[DocMember] = []
965+
for p in properties:
966+
if p.name not in seen_props:
967+
seen_props.add(p.name)
968+
unique_props.append(p)
867969
lines.append("## Properties")
868970
lines.append("")
869971
lines.append("| Name | Type | Description |")
870972
lines.append("|------|------|-------------|")
871-
for p in properties:
872-
lines.append(f"| `{p.name}` | `{p.return_type}` | {p.summary} |")
973+
for p in unique_props:
974+
lines.append(f"| `{p.name}` | `{p.return_type}` | {_escape_table_cell(p.summary)} |")
873975
lines.append("")
874976

875977
# Methods/Functions
@@ -936,7 +1038,7 @@ def render_index_page(
9361038
lines.append("|------|-------------|")
9371039
for ct in core_types:
9381040
# Collapse multi-line summaries for table cells
939-
desc = _one_line(ct.summary)
1041+
desc = _escape_table_cell(ct.summary)
9401042
lines.append(f"| [`{ct.name}`]({ct.name}.md) | {desc} |")
9411043
lines.append("")
9421044

@@ -945,7 +1047,7 @@ def render_index_page(
9451047
lines.append("| Module | Description |")
9461048
lines.append("|--------|-------------|")
9471049
for mod in modules:
948-
desc = _one_line(mod.summary)
1050+
desc = _escape_table_cell(mod.summary)
9491051
lines.append(f"| [`{mod.name}`]({mod.name}.md) | {desc} |")
9501052
lines.append("")
9511053

0 commit comments

Comments
 (0)