Skip to content

Commit c34a888

Browse files
committed
DOCSP-4304: Validate rst structure
1 parent 05c8cdb commit c34a888

File tree

7 files changed

+684
-101
lines changed

7 files changed

+684
-101
lines changed

snooty/rstparser.py

Lines changed: 70 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import pkg_resources
2+
import re
13
import docutils.frontend
24
import docutils.nodes
35
import docutils.parsers.rst
@@ -6,16 +8,16 @@
68
import docutils.parsers.rst.states
79
import docutils.statemachine
810
import docutils.utils
9-
import re
1011
from dataclasses import dataclass
1112
from pathlib import Path, PurePath
12-
from typing import Any, Callable, Dict, Generic, Optional, List, Tuple, \
13-
Type, TypeVar, Iterable, Sequence
13+
from typing import Any, Dict, Generic, Optional, List, Tuple, \
14+
Type, TypeVar, Iterable
1415
from typing_extensions import Protocol
1516
from .gizaparser.parse import load_yaml
1617
from .gizaparser import nodes
1718
from .types import Diagnostic, ProjectConfig
1819
from .flutter import checked, check_type, LoadError
20+
from . import specparser
1921

2022
PAT_EXPLICIT_TILE = re.compile(r'^(?P<label>.+?)\s*(?<!\x00)<(?P<target>.*?)>$', re.DOTALL)
2123
PAT_WHITESPACE = re.compile(r'^\x20*')
@@ -74,70 +76,6 @@ def __init__(self, name: str, rawtext: str, text: str, lineno: int) -> None:
7476
self['target'] = text
7577

7678

77-
def parse_directive_arguments(self: docutils.parsers.rst.states.Body,
78-
directive: docutils.parsers.rst.Directive,
79-
arg_block: Iterable[str]) -> Sequence[str]:
80-
required = directive.required_arguments
81-
optional = directive.optional_arguments
82-
arg_text = '\n'.join(arg_block)
83-
arguments = arg_text.split()
84-
if len(arguments) < required:
85-
raise docutils.parsers.rst.states.MarkupError(
86-
'{} argument(s) required, {} supplied'.format(required, len(arguments)))
87-
elif len(arguments) > required + optional:
88-
if directive.final_argument_whitespace:
89-
arguments = arg_text.split(' ', required + optional - 1)
90-
else:
91-
raise docutils.parsers.rst.states.MarkupError(
92-
'maximum %s argument(s) allowed, %s supplied'
93-
% (required + optional, len(arguments)))
94-
return arguments
95-
96-
97-
docutils.parsers.rst.states.Body.parse_directive_arguments = ( # type: ignore
98-
parse_directive_arguments)
99-
100-
101-
def parse_options(block_text: str) -> Dict[str, str]:
102-
"""Docutils doesn't parse directive options that aren't known ahead
103-
of time. Do it ourselves, badly."""
104-
lines = block_text.split('\n')
105-
current_key: Optional[str] = None
106-
kv: Dict[str, str] = {}
107-
base_indentation = 0
108-
109-
for i, line in enumerate(lines):
110-
if i == 0:
111-
continue
112-
113-
stripped = line.strip()
114-
if not stripped:
115-
continue
116-
117-
whitespace_match = PAT_WHITESPACE.match(line)
118-
assert whitespace_match is not None
119-
indentation = len(whitespace_match.group(0))
120-
121-
if base_indentation == 0:
122-
base_indentation = indentation
123-
124-
match = re.match(docutils.parsers.rst.states.Body.patterns['field_marker'], stripped)
125-
if match:
126-
current_key = match.group(0)
127-
assert current_key is not None
128-
value = stripped[len(current_key):]
129-
current_key = current_key.strip().strip(':')
130-
kv[current_key] = value
131-
continue
132-
133-
if indentation == base_indentation:
134-
break
135-
elif current_key:
136-
kv[current_key] += '\n' + line[indentation:]
137-
138-
return kv
139-
140-
14179
def parse_linenos(term: str, max_val: int) -> List[Tuple[int, int]]:
14280
"""Parse a comma-delimited list of line numbers and ranges."""
14381
results: List[Tuple[int, int]] = []
@@ -154,30 +92,27 @@ def parse_linenos(term: str, max_val: int) -> List[Tuple[int, int]]:
15492
return results
15593

15694

157-
class Directive(docutils.parsers.rst.Directive):
158-
optional_arguments = 1
159-
final_argument_whitespace = True
160-
has_content = True
95+
class BaseDocutilsDirective(docutils.parsers.rst.Directive):
96+
required_arguments = 0
16197

16298
def run(self) -> List[docutils.nodes.Node]:
16399
source, line = self.state_machine.get_source_and_line(self.lineno)
164100
node = directive(self.name)
165101
node.document = self.state.document
166102
node.source, node.line = source, line
103+
node['options'] = self.options
167104
self.add_name(node)
168105

169-
# Parse options
170-
options = parse_options(self.block_text)
171-
node['options'] = options
172-
173106
# Parse the directive's argument. An argument spans from the 0th line to the first
174107
# non-option line; this is a heuristic that is not part of docutils, since docutils
175108
# requires each directive to define its syntax.
176109
if self.arguments and not self.arguments[0].startswith(':'):
177110
arg_lines = self.arguments[0].split('\n')
178111
argument_text = arg_lines[0]
179112
textnodes, messages = self.state.inline_text(argument_text, self.lineno)
180-
if len(arg_lines) > 1 and not options and PAT_BLOCK_HAS_ARGUMENT.match(self.block_text):
113+
if len(arg_lines) > 1 and \
114+
not self.options and \
115+
PAT_BLOCK_HAS_ARGUMENT.match(self.block_text):
181116
node.extend(textnodes)
182117
else:
183118
argument = directive_argument(argument_text, '', *textnodes)
@@ -215,7 +150,11 @@ def prepare_viewlist(text: str, ignore: int = 1) -> List[str]:
215150
return lines
216151

217152

218-
class TabsDirective(Directive):
153+
class TabsDirective(BaseDocutilsDirective):
154+
required_arguments = 0
155+
optional_arguments = 1
156+
final_argument_whitespace = True
157+
has_content = True
219158
option_spec = {
220159
'tabset': str,
221160
'hidden': option_bool
@@ -256,7 +195,7 @@ def run(self) -> List[docutils.nodes.Node]:
256195
return [node]
257196

258197
# The new syntax needs no special handling
259-
return Directive.run(self)
198+
return super().run()
260199

261200
def make_tab_node(self, source: str, child: LegacyTabDefinition) -> docutils.nodes.Node:
262201
line = self.lineno + child.line
@@ -284,9 +223,11 @@ def make_tab_node(self, source: str, child: LegacyTabDefinition) -> docutils.nod
284223
return node
285224

286225

287-
class CodeDirective(Directive):
226+
class CodeDirective(docutils.parsers.rst.Directive):
288227
required_arguments = 1
289228
optional_arguments = 0
229+
has_content = True
230+
final_argument_whitespace = True
290231
option_spec = {
291232
'copyable': option_bool,
292233
'emphasize-lines': str
@@ -324,26 +265,6 @@ def handle_role(typ: str, rawtext: str, text: str,
324265
return [node], []
325266

326267

327-
def lookup_directive(directive_name: str, language_module: object,
328-
document: docutils.nodes.document) -> Tuple[Type[Any], List[object]]:
329-
if directive_name.startswith('tabs'):
330-
return TabsDirective, []
331-
332-
if directive_name in {'code-block', 'sourcecode'}:
333-
return CodeDirective, []
334-
335-
return Directive, []
336-
337-
338-
def lookup_role(role_name: str, language_module: object, lineno: int,
339-
reporter: object) -> Tuple[Optional[Callable[..., Any]], List[object]]:
340-
return handle_role, []
341-
342-
343-
docutils.parsers.rst.directives.directive = lookup_directive
344-
docutils.parsers.rst.roles.role = lookup_role
345-
346-
347268
class NoTransformRstParser(docutils.parsers.rst.Parser):
348269
def get_transforms(self) -> List[object]:
349270
return []
@@ -365,13 +286,63 @@ def add_diagnostics(self, diagnostics: Iterable[Diagnostic]) -> None: ...
365286
_V = TypeVar('_V', bound=Visitor)
366287

367288

289+
def register_spec_with_docutils(spec: specparser.Spec) -> None:
290+
"""Register all of the definitions in the spec with docutils."""
291+
directives = list(spec.directive.items())
292+
roles = list(spec.role.items())
293+
294+
for name, rst_object in spec.rstobject.items():
295+
directive = rst_object.create_directive()
296+
role = rst_object.create_role()
297+
directives.append((name, directive))
298+
roles.append((name, role))
299+
300+
for name, directive in directives:
301+
# Skip abstract base directives
302+
if name.startswith('_'):
303+
continue
304+
305+
# Tabs have special handling because of the need to support legacy syntax
306+
if name == 'tabs' or name.startswith('tabs-'):
307+
docutils.parsers.rst.directives.register_directive(name, TabsDirective)
308+
continue
309+
310+
options: Dict[str, object] = {
311+
option_name:
312+
spec.get_validator(option) for option_name, option in directive.options.items()
313+
}
314+
315+
class DocutilsDirective(BaseDocutilsDirective):
316+
has_content = bool(directive.content_type)
317+
optional_arguments = 1 if directive.argument_type else 0
318+
final_argument_whitespace = True
319+
option_spec = options
320+
321+
new_name = ''.join(e for e in name.title() if e.isalnum() or e == '_') + 'Directive'
322+
DocutilsDirective.__name__ = DocutilsDirective.__qualname__ = new_name
323+
docutils.parsers.rst.directives.register_directive(name, DocutilsDirective)
324+
325+
# Code blocks currently have special handling
326+
docutils.parsers.rst.directives.register_directive('code-block', CodeDirective)
327+
docutils.parsers.rst.directives.register_directive('sourcecode', CodeDirective)
328+
329+
for name, role_spec in roles:
330+
docutils.parsers.rst.roles.register_local_role(name, handle_role)
331+
332+
368333
class Parser(Generic[_V]):
369334
__slots__ = ('project_config', 'visitor_class')
335+
spec: Optional[specparser.Spec] = None
370336

371337
def __init__(self, project_config: ProjectConfig, visitor_class: Type[_V]) -> None:
372338
self.project_config = project_config
373339
self.visitor_class = visitor_class
374340

341+
if not self.spec:
342+
spec = Parser.spec = specparser.Spec.loads(
343+
str(pkg_resources.resource_string(__name__, 'rstspec.toml'), 'utf-8'))
344+
register_spec_with_docutils(spec)
345+
375346
def parse(self, path: Path, text: Optional[str]) -> Tuple[_V, str]:
376347
diagnostics: List[Diagnostic] = []
377348
if text is None:

0 commit comments

Comments
 (0)