|
| 1 | +"""Sanity test using rstcheck and sphinx.""" |
| 2 | +from __future__ import annotations |
| 3 | + |
| 4 | +import io |
| 5 | +import os |
| 6 | +import re |
| 7 | +import sys |
| 8 | +import tempfile |
| 9 | +import traceback |
| 10 | + |
| 11 | +from yamllint import linter |
| 12 | +from yamllint.config import YamlLintConfig |
| 13 | +# from yamllint.linter import PROBLEM_LEVELS |
| 14 | + |
| 15 | +from docutils import nodes |
| 16 | +from docutils.core import Publisher |
| 17 | +from docutils.frontend import Values |
| 18 | +from docutils.io import StringInput |
| 19 | +from docutils.parsers.rst import Directive, Parser as RstParser |
| 20 | +from docutils.parsers.rst.directives import register_directive, unchanged as directive_param_unchanged |
| 21 | +from docutils.utils import new_reporter, Reporter, SystemMessage |
| 22 | + |
| 23 | +import sphinx.application |
| 24 | +from sphinx.application import Sphinx |
| 25 | + |
| 26 | + |
| 27 | +YAMLLINT_CONFIG = r""" |
| 28 | +extends: default |
| 29 | +
|
| 30 | +rules: |
| 31 | + line-length: |
| 32 | + max: 160 |
| 33 | + level: warning |
| 34 | + document-start: |
| 35 | + # present: true |
| 36 | + level: warning |
| 37 | + document-end: |
| 38 | + # present: false |
| 39 | + level: warning |
| 40 | + truthy: |
| 41 | + level: warning |
| 42 | + allowed-values: |
| 43 | + - 'true' |
| 44 | + - 'false' |
| 45 | + indentation: |
| 46 | + level: warning |
| 47 | + spaces: 2 |
| 48 | + indent-sequences: consistent |
| 49 | + key-duplicates: |
| 50 | + level: warning |
| 51 | + forbid-duplicated-merge-keys: true |
| 52 | + trailing-spaces: enable |
| 53 | + hyphens: |
| 54 | + max-spaces-after: 1 |
| 55 | + level: warning |
| 56 | + empty-lines: |
| 57 | + max: 2 |
| 58 | + max-start: 0 |
| 59 | + max-end: 0 |
| 60 | + level: warning |
| 61 | + commas: |
| 62 | + max-spaces-before: 0 |
| 63 | + min-spaces-after: 1 |
| 64 | + max-spaces-after: 1 |
| 65 | + level: warning |
| 66 | + colons: |
| 67 | + max-spaces-before: 0 |
| 68 | + max-spaces-after: 1 |
| 69 | + level: warning |
| 70 | + brackets: |
| 71 | + min-spaces-inside: 0 |
| 72 | + max-spaces-inside: 0 |
| 73 | + level: warning |
| 74 | + braces: |
| 75 | + min-spaces-inside: 0 |
| 76 | + max-spaces-inside: 1 |
| 77 | + level: warning |
| 78 | + octal-values: |
| 79 | + forbid-implicit-octal: true |
| 80 | + forbid-explicit-octal: true |
| 81 | + level: warning |
| 82 | + comments: |
| 83 | + min-spaces-from-content: 1 |
| 84 | + level: warning |
| 85 | +""" |
| 86 | + |
| 87 | +REPORT_LEVELS = { |
| 88 | + # 'warning', # TODO: enable later |
| 89 | + 'error', |
| 90 | +} |
| 91 | + |
| 92 | + |
| 93 | +class IgnoreDirective(Directive): |
| 94 | + has_content = True |
| 95 | + |
| 96 | + def run(self) -> list: |
| 97 | + return [] |
| 98 | + |
| 99 | + |
| 100 | +class CodeBlockDirective(Directive): |
| 101 | + has_content = True |
| 102 | + optional_arguments = 1 |
| 103 | + |
| 104 | + # These are all options Sphinx allows for code blocks. |
| 105 | + # We need to have them here so that docutils successfully parses this extension. |
| 106 | + option_spec = { |
| 107 | + 'caption': directive_param_unchanged, |
| 108 | + 'class': directive_param_unchanged, |
| 109 | + 'dedent': directive_param_unchanged, |
| 110 | + 'emphasize-lines': directive_param_unchanged, |
| 111 | + 'name': directive_param_unchanged, |
| 112 | + 'force': directive_param_unchanged, |
| 113 | + 'linenos': directive_param_unchanged, |
| 114 | + 'lineno-start': directive_param_unchanged, |
| 115 | + } |
| 116 | + |
| 117 | + def run(self) -> list[nodes.literal_block]: |
| 118 | + code = "\n".join(self.content) |
| 119 | + literal = nodes.literal_block(code, code) |
| 120 | + literal["classes"].append("code-block") |
| 121 | + literal["ansible-code-language"] = self.arguments[0] if self.arguments else None |
| 122 | + literal["ansible-code-block"] = True |
| 123 | + literal["ansible-code-lineno"] = self.lineno |
| 124 | + return [literal] |
| 125 | + |
| 126 | + |
| 127 | +class YamlLintVisitor(nodes.SparseNodeVisitor): |
| 128 | + def __init__(self, document: nodes.document, path: str, results: list[dict], content: str): |
| 129 | + super().__init__(document) |
| 130 | + self.__path = path |
| 131 | + self.__results = results |
| 132 | + self.__content_lines = content.splitlines() |
| 133 | + |
| 134 | + def visit_system_message(self, node: nodes.system_message) -> None: |
| 135 | + raise nodes.SkipNode |
| 136 | + |
| 137 | + def visit_error(self, node: nodes.error) -> None: |
| 138 | + raise nodes.SkipNode |
| 139 | + |
| 140 | + def visit_literal_block(self, node: nodes.literal_block) -> None: |
| 141 | + if "ansible-code-block" not in node.attributes: |
| 142 | + if node.attributes["classes"]: |
| 143 | + self.__results.append({ |
| 144 | + 'path': self.__path, |
| 145 | + 'line': node.line or 'unknown', |
| 146 | + 'col': 0, |
| 147 | + 'message': f"Warning: found unknown literal block! Could be due to '::'. If not, please report this, this is likely a bug in the checker (could be an unsupported Sphinx directive). Node: {node!r}; attributes: {node.attributes}; content: {node.rawsource!r}", |
| 148 | + }) |
| 149 | + raise nodes.SkipNode |
| 150 | + |
| 151 | + language = node.attributes["ansible-code-language"] |
| 152 | + lineno = node.attributes["ansible-code-lineno"] |
| 153 | + |
| 154 | + # Ok, we have to find both the row and the column offset for the actual code content |
| 155 | + row_offset = lineno |
| 156 | + found_empty_line = False |
| 157 | + found_content_lines = False |
| 158 | + content_lines = node.rawsource.count('\n') + 1 |
| 159 | + min_indent = None |
| 160 | + for offset, line in enumerate(self.__content_lines[lineno:]): |
| 161 | + stripped_line = line.strip() |
| 162 | + if not stripped_line: |
| 163 | + if not found_empty_line: |
| 164 | + row_offset = lineno + offset + 1 |
| 165 | + found_empty_line = True |
| 166 | + elif not found_content_lines: |
| 167 | + found_content_lines = True |
| 168 | + row_offset = lineno + offset |
| 169 | + |
| 170 | + if found_content_lines and content_lines > 0: |
| 171 | + if stripped_line: |
| 172 | + indent = len(line) - len(line.lstrip()) |
| 173 | + if min_indent is None or min_indent > indent: |
| 174 | + min_indent = indent |
| 175 | + content_lines -= 1 |
| 176 | + elif not content_lines: |
| 177 | + break |
| 178 | + |
| 179 | + min_source_indent = None |
| 180 | + for line in node.rawsource.split('\n'): |
| 181 | + stripped_line = line.lstrip() |
| 182 | + if stripped_line: |
| 183 | + indent = len(line) - len(line.lstrip()) |
| 184 | + if min_source_indent is None or min_source_indent > indent: |
| 185 | + min_source_indent = indent |
| 186 | + |
| 187 | + col_offset = max(0, (min_indent or 0) - (min_source_indent or 0)) |
| 188 | + |
| 189 | + # Now that we have the offsets, we can actually do some processing... |
| 190 | + if language not in {'YAML', 'yaml', 'yaml+jinja', 'YAML+Jinja'}: |
| 191 | + if language is None: |
| 192 | + self.__results.append({ |
| 193 | + 'path': self.__path, |
| 194 | + 'line': row_offset + 1, |
| 195 | + 'col': col_offset + 1, |
| 196 | + 'message': f"Literal block without language!", |
| 197 | + }) |
| 198 | + return |
| 199 | + if language not in {'bash', 'ini', 'console', 'text', 'shell', 'shell-session', 'jinja', 'ansible-output', 'none', 'json', 'python', 'Jinja', 'diff', 'powershell', 'md', 'reStructuredText', 'rst', 'sh', 'Python', 'csharp'}: |
| 200 | + self.__results.append({ |
| 201 | + 'path': self.__path, |
| 202 | + 'line': row_offset + 1, |
| 203 | + 'col': col_offset + 1, |
| 204 | + 'message': f"Warning: literal block with disallowed language: {language}. If the language should be allowed, the checker needs to be updated.", |
| 205 | + }) |
| 206 | + raise nodes.SkipNode |
| 207 | + |
| 208 | + # So we have YAML. Let's lint it! |
| 209 | + try: |
| 210 | + conf = YamlLintConfig(YAMLLINT_CONFIG) |
| 211 | + problems = linter.run(io.StringIO(node.rawsource.rstrip() + '\n'), conf, self.__path) |
| 212 | + for problem in problems: |
| 213 | + if problem.level not in REPORT_LEVELS: |
| 214 | + continue |
| 215 | + msg = f"{problem.level}: {problem.desc}" |
| 216 | + if problem.rule: |
| 217 | + msg += f" ({problem.rule})" |
| 218 | + self.__results.append({ |
| 219 | + 'path': self.__path, |
| 220 | + 'line': row_offset + problem.line, |
| 221 | + 'col': col_offset + problem.column, |
| 222 | + 'message': msg, |
| 223 | + }) |
| 224 | + except Exception as exc: |
| 225 | + self.__results.append({ |
| 226 | + 'path': self.__path, |
| 227 | + 'line': row_offset + 1, |
| 228 | + 'col': col_offset + 1, |
| 229 | + 'message': f"Internal error while linting YAML: exception {type(exc)}: {str(exc).replace("\n", " / ")}; traceback: {traceback.format_exc()!r}", |
| 230 | + }) |
| 231 | + |
| 232 | + raise nodes.SkipNode |
| 233 | + |
| 234 | + |
| 235 | +def main(): |
| 236 | + paths = sys.argv[1:] or sys.stdin.read().splitlines() |
| 237 | + results = [] |
| 238 | + |
| 239 | + for directive in ( |
| 240 | + 'code', |
| 241 | + 'code-block', |
| 242 | + 'sourcecode', |
| 243 | + ): |
| 244 | + register_directive(directive, CodeBlockDirective) |
| 245 | + |
| 246 | + # The following docutils directives should better be ignored: |
| 247 | + for directive in ( |
| 248 | + 'parsed-literal', |
| 249 | + ): |
| 250 | + register_directive(directive, IgnoreDirective) |
| 251 | + |
| 252 | + # TODO: should we handle the 'literalinclude' directive? maybe check file directly if right extension? |
| 253 | + # (https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#directive-literalinclude) |
| 254 | + |
| 255 | + docs_root = os.path.normpath(os.path.join(os.path.dirname(__file__), '..', '..', 'docs', 'docsite', 'rst')) |
| 256 | + |
| 257 | + parser = RstParser() |
| 258 | + for path in paths: |
| 259 | + with open(path, 'rt', encoding='utf-8') as f: |
| 260 | + content = f.read() |
| 261 | + |
| 262 | + # We create a Publisher only to have a mechanism which gives us the settings object. |
| 263 | + # Doing this more explicit is a bad idea since the classes used are deprecated and will |
| 264 | + # eventually get replaced. Publisher.get_settings() looks like a stable enough API that |
| 265 | + # we can 'just use'. |
| 266 | + publisher = Publisher(source_class=StringInput) |
| 267 | + publisher.set_components('standalone', 'restructuredtext', 'pseudoxml') |
| 268 | + override = { |
| 269 | + "root_prefix": docs_root, |
| 270 | + "input_encoding": "utf-8", |
| 271 | + "file_insertion_enabled": False, |
| 272 | + "raw_enabled": False, |
| 273 | + "_disable_config": True, |
| 274 | + "report_level": Reporter.ERROR_LEVEL, |
| 275 | + "warning_stream": io.StringIO(), |
| 276 | + } |
| 277 | + publisher.process_programmatic_settings(None, override, None) |
| 278 | + publisher.set_source(content, path) |
| 279 | + |
| 280 | + # Parse the document |
| 281 | + try: |
| 282 | + doc = publisher.reader.read(publisher.source, publisher.parser, publisher.settings) |
| 283 | + except SystemMessage as exc: |
| 284 | + results.append({ |
| 285 | + 'path': path, |
| 286 | + 'line': 0, |
| 287 | + 'col': 0, |
| 288 | + 'message': f'Cannot parse document: {str(exc).replace("\n", " / ")}' |
| 289 | + }) |
| 290 | + continue |
| 291 | + except Exception as exc: |
| 292 | + results.append({ |
| 293 | + 'path': path, |
| 294 | + 'line': 0, |
| 295 | + 'col': 0, |
| 296 | + 'message': f'Cannot parse document, unexpected error {type(exc)}: {str(exc).replace("\n", " / ")}; traceback: {traceback.format_exc()!r}' |
| 297 | + }) |
| 298 | + continue |
| 299 | + |
| 300 | + # Process the document |
| 301 | + try: |
| 302 | + visitor = YamlLintVisitor(doc, path, results, content) |
| 303 | + doc.walk(visitor) |
| 304 | + except Exception as exc: |
| 305 | + results.append({ |
| 306 | + 'path': path, |
| 307 | + 'line': 0, |
| 308 | + 'col': 0, |
| 309 | + 'message': f'Cannot process document: {type(exc)} {str(exc).replace("\n", " / ")}; traceback: {traceback.format_exc()!r}' |
| 310 | + }) |
| 311 | + |
| 312 | + for result in sorted(results, key=lambda result: (result['path'], result['line'], result['col'], result['message'])): |
| 313 | + print('{path}:{line}:{col}: {message}'.format(**result)) |
| 314 | + |
| 315 | + |
| 316 | +if __name__ == '__main__': |
| 317 | + main() |
0 commit comments