|
1 | 1 | #!/usr/bin/env python3 |
2 | | -"""Check that Sphinx/ReST section underlines match title lengths.""" |
3 | | -import re |
4 | | -import sys |
| 2 | +"""Check that RST title underlines (and overlines) have the same length as the title text.""" |
| 3 | + |
| 4 | +import argparse |
5 | 5 | from pathlib import Path |
6 | 6 |
|
7 | | -# Pattern to match Sphinx section titles followed by underlines |
8 | | -# Supports: = - ` : . ' " ~ ^ _ * + # < > |
9 | | -SECTION_PATTERN = re.compile(r"^([^\n]+)\n([~=\-^`':\"#\*_\+.<>]+)\n", re.MULTILINE) |
| 7 | +RST_SECTION_CHARS = frozenset("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") |
| 8 | + |
| 9 | + |
| 10 | +def _strip_bom(line: str) -> str: |
| 11 | + """Strip the Unicode BOM (U+FEFF) that some generators emit.""" |
| 12 | + return line.lstrip("\ufeff") |
10 | 13 |
|
11 | 14 |
|
12 | | -def fix_file(path): |
13 | | - """Fix underline length mismatches in a file.""" |
14 | | - try: |
15 | | - text = Path(path).read_text(encoding="utf-8") |
16 | | - except Exception as e: |
17 | | - print(f"Warning: Could not read {path}: {e}") |
| 15 | +def _is_section_line(line: str) -> bool: |
| 16 | + """Return True if *line* is composed entirely of one repeated RST section character.""" |
| 17 | + if not line: |
18 | 18 | return False |
| 19 | + if line[0] not in RST_SECTION_CHARS: |
| 20 | + return False |
| 21 | + return len(set(line)) == 1 |
19 | 22 |
|
20 | | - original_text = text |
21 | | - fixed_count = 0 |
22 | 23 |
|
23 | | - def replace_underline(match): |
24 | | - nonlocal fixed_count |
25 | | - title, underline = match.groups() |
26 | | - title_stripped = title.strip() |
27 | | - underline_stripped = underline.strip() |
| 24 | +def _is_title_candidate(line: str) -> bool: |
| 25 | + """Return True if *line* could be an RST section title.""" |
| 26 | + if not line or line[0] == " ": |
| 27 | + return False |
| 28 | + if _is_section_line(line): |
| 29 | + return False |
| 30 | + if line.startswith(">>>") or line.startswith("..."): |
| 31 | + return False |
| 32 | + return True |
28 | 33 |
|
29 | | - # Skip if title is empty or looks like it might be a code block or other content |
30 | | - if not title_stripped or title_stripped.startswith(".."): |
31 | | - return match.group(0) |
32 | 34 |
|
33 | | - if len(title_stripped) != len(underline_stripped): |
34 | | - # Get the underline character and create correct length underline |
35 | | - underline_char = underline_stripped[0] |
36 | | - correct_underline = underline_char * len(title_stripped) |
37 | | - fixed_count += 1 |
38 | | - return f"{title}\n{correct_underline}\n" |
| 35 | +def _visible_len(line: str) -> int: |
| 36 | + """Length ignoring BOM characters.""" |
| 37 | + return len(_strip_bom(line)) |
39 | 38 |
|
40 | | - return match.group(0) |
41 | 39 |
|
42 | | - text = SECTION_PATTERN.sub(replace_underline, text) |
| 40 | +def _scan(lines: list[str]) -> list[tuple[int, str, int, int, int]]: |
| 41 | + """Return ``(marker_idx, title, title_len, marker_len, is_overline)`` for every mismatch.""" |
| 42 | + n = len(lines) |
| 43 | + errors: list[tuple[int, str, int, int, int]] = [] |
| 44 | + consumed: set[int] = set() |
43 | 45 |
|
44 | | - if text != original_text: |
45 | | - Path(path).write_text(text, encoding="utf-8") |
46 | | - return fixed_count |
| 46 | + i = 0 |
| 47 | + while i < n: |
| 48 | + line = lines[i] |
47 | 49 |
|
48 | | - return 0 |
| 50 | + if not _is_section_line(line) or line[0] == " ": |
| 51 | + i += 1 |
| 52 | + continue |
49 | 53 |
|
| 54 | + # --- overline + title + underline --- |
| 55 | + title_candidate = _strip_bom(lines[i + 1]) if i + 1 < n else "" |
| 56 | + if ( |
| 57 | + i + 2 < n |
| 58 | + and _is_title_candidate(title_candidate) |
| 59 | + and _is_section_line(lines[i + 2]) |
| 60 | + and lines[i + 2][0] == line[0] |
| 61 | + ): |
| 62 | + title = title_candidate |
| 63 | + title_len = len(title) |
| 64 | + |
| 65 | + if len(line) != title_len: |
| 66 | + errors.append((i, title, title_len, len(line), True)) |
| 67 | + if len(lines[i + 2]) != title_len: |
| 68 | + errors.append((i + 2, title, title_len, len(lines[i + 2]), False)) |
| 69 | + consumed.update({i, i + 1, i + 2}) |
| 70 | + i += 3 |
| 71 | + continue |
50 | 72 |
|
51 | | -def check_file(path): |
52 | | - """Check a single file for underline length mismatches.""" |
53 | | - try: |
54 | | - text = Path(path).read_text(encoding="utf-8") |
55 | | - except Exception as e: |
56 | | - print(f"Warning: Could not read {path}: {e}") |
57 | | - return [] |
| 73 | + # --- title (previous line) + underline --- |
| 74 | + if i > 0 and i not in consumed: |
| 75 | + prev = _strip_bom(lines[i - 1]) |
| 76 | + if _is_title_candidate(prev): |
| 77 | + title_len = len(prev) |
| 78 | + if len(line) != title_len: |
| 79 | + errors.append((i, prev, title_len, len(line), False)) |
58 | 80 |
|
59 | | - errors = [] |
| 81 | + i += 1 |
60 | 82 |
|
61 | | - for match in SECTION_PATTERN.finditer(text): |
62 | | - title, underline = match.groups() |
63 | | - title_stripped = title.strip() |
64 | | - underline_stripped = underline.strip() |
| 83 | + return errors |
65 | 84 |
|
66 | | - # Skip if title is empty or looks like it might be a code block or other content |
67 | | - if not title_stripped or title_stripped.startswith(".."): |
68 | | - continue |
69 | 85 |
|
70 | | - if len(title_stripped) != len(underline_stripped): |
71 | | - # Calculate line number |
72 | | - line_num = text.count("\n", 0, match.start()) + 1 |
73 | | - errors.append( |
74 | | - f"{path}:{line_num}: " |
75 | | - f"title '{title_stripped}' length {len(title_stripped)}, " |
76 | | - f"underline length {len(underline_stripped)}" |
77 | | - ) |
| 86 | +def fix_file(filepath: str) -> list[tuple[int, str, int, int]]: |
| 87 | + """Fix marker lines in-place and return the list of mismatches that were corrected.""" |
| 88 | + with open(filepath) as fh: |
| 89 | + raw_lines = fh.readlines() |
| 90 | + |
| 91 | + lines = [raw.rstrip("\n\r") for raw in raw_lines] |
| 92 | + mismatches = _scan(lines) |
| 93 | + |
| 94 | + reported: list[tuple[int, str, int, int]] = [] |
| 95 | + fixed_indices: dict[int, str] = {} |
| 96 | + for marker_idx, title, title_len, marker_len, _is_over in mismatches: |
| 97 | + reported.append((marker_idx + 1, title, title_len, marker_len)) |
| 98 | + fixed_indices[marker_idx] = lines[marker_idx][0] * title_len |
| 99 | + |
| 100 | + if fixed_indices: |
| 101 | + with open(filepath, "w") as fh: |
| 102 | + for idx, raw_line in enumerate(raw_lines): |
| 103 | + if idx in fixed_indices: |
| 104 | + fh.write(fixed_indices[idx] + "\n") |
| 105 | + else: |
| 106 | + fh.write(raw_line) |
| 107 | + |
| 108 | + return reported |
78 | 109 |
|
79 | | - return errors |
80 | 110 |
|
| 111 | +def check_file(filepath: str) -> list[tuple[int, str, int, int]]: |
| 112 | + """Return ``(lineno, title, title_len, marker_len)`` for every mismatch in *filepath*.""" |
| 113 | + with open(filepath) as fh: |
| 114 | + raw_lines = fh.readlines() |
81 | 115 |
|
82 | | -def main(argv): |
83 | | - """Main entry point for the hook.""" |
84 | | - # Check for --fix flag |
85 | | - fix_mode = "--fix" in argv |
86 | | - if fix_mode: |
87 | | - argv = [arg for arg in argv if arg != "--fix"] |
88 | | - |
89 | | - if len(argv) < 2: |
90 | | - print("✅ Sphinx section underline check: no files to check.") |
91 | | - sys.exit(0) |
92 | | - |
93 | | - if fix_mode: |
94 | | - total_fixed = 0 |
95 | | - for path in argv[1:]: |
96 | | - fixed_count = fix_file(path) |
97 | | - if fixed_count: |
98 | | - print(f"✏️ Fixed {fixed_count} section(s) in {path}") |
99 | | - total_fixed += fixed_count |
100 | | - |
101 | | - if total_fixed: |
102 | | - print(f"\n✅ Fixed {total_fixed} section underline(s) total.") |
103 | | - sys.exit(0) |
104 | | - else: |
105 | | - print("✅ Sphinx section underline check: no fixes needed.") |
106 | | - sys.exit(0) |
107 | | - else: |
108 | | - all_errors = [] |
109 | | - for path in argv[1:]: |
110 | | - all_errors.extend(check_file(path)) |
111 | | - |
112 | | - if all_errors: |
113 | | - print("❌ Sphinx section underline length errors:\n") |
114 | | - for e in all_errors: |
115 | | - print(" ", e) |
116 | | - print("\nFix underline lengths to match title text.") |
117 | | - print("Or run with --fix flag to automatically fix them:") |
| 116 | + lines = [raw.rstrip("\n\r") for raw in raw_lines] |
| 117 | + return [ |
| 118 | + (marker_idx + 1, title, title_len, marker_len) |
| 119 | + for marker_idx, title, title_len, marker_len, _ in _scan(lines) |
| 120 | + ] |
| 121 | + |
| 122 | + |
| 123 | +def main() -> int: |
| 124 | + parser = argparse.ArgumentParser(description=__doc__) |
| 125 | + parser.add_argument( |
| 126 | + "files", nargs="*", help="RST files to check (default: all docs/**/*.rst)" |
| 127 | + ) |
| 128 | + parser.add_argument("--fix", action="store_true", help="Fix underlines in-place") |
| 129 | + args = parser.parse_args() |
| 130 | + |
| 131 | + paths = args.files or [str(p) for p in Path("docs").rglob("*.rst")] |
| 132 | + |
| 133 | + handler = fix_file if args.fix else check_file |
| 134 | + failures = 0 |
| 135 | + for path in sorted(paths): |
| 136 | + for lineno, title, title_len, marker_len in handler(path): |
| 137 | + verb = "fixed" if args.fix else "found" |
118 | 138 | print( |
119 | | - f" python3 scripts/check-sphinx-section-underline --fix {' '.join(argv[1:])}" |
| 139 | + f"{path}:{lineno}: title/underline length mismatch ({verb}): " |
| 140 | + f"title {title_len!r} != marker {marker_len!r} " |
| 141 | + f"({title!r})" |
120 | 142 | ) |
121 | | - sys.exit(1) |
122 | | - else: |
123 | | - print("✅ Sphinx section underline check passed.") |
124 | | - sys.exit(0) |
| 143 | + failures += 1 |
| 144 | + |
| 145 | + return 1 if failures else 0 |
125 | 146 |
|
126 | 147 |
|
127 | 148 | if __name__ == "__main__": |
128 | | - main(sys.argv) |
| 149 | + raise SystemExit(main()) |
0 commit comments