From eed2524640f0f616257662eb7588d92ee3bb64ce Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Mon, 10 Feb 2025 19:04:19 +0000 Subject: [PATCH 01/11] Remove ``productionlist`` hard-coding in translators --- doc/usage/restructuredtext/directives.rst | 18 +-- sphinx/domains/std/__init__.py | 110 +++++++++++++----- sphinx/writers/html5.py | 20 +--- sphinx/writers/latex.py | 17 ++- sphinx/writers/manpage.py | 21 +--- sphinx/writers/texinfo.py | 24 ++-- sphinx/writers/text.py | 30 +++-- .../test_directive_productionlist.py | 20 ++-- 8 files changed, 146 insertions(+), 114 deletions(-) diff --git a/doc/usage/restructuredtext/directives.rst b/doc/usage/restructuredtext/directives.rst index ee085788e1d..5d7d241d600 100644 --- a/doc/usage/restructuredtext/directives.rst +++ b/doc/usage/restructuredtext/directives.rst @@ -1647,7 +1647,7 @@ derived forms), but provides enough to allow context-free grammars to be displayed in a way that causes uses of a symbol to be rendered as hyperlinks to the definition of the symbol. There is this directive: -.. rst:directive:: .. productionlist:: [productionGroup] +.. rst:directive:: .. productionlist:: [production_group] This directive is used to enclose a group of productions. Each production is given on a single line and consists of a name, separated by a colon from @@ -1662,26 +1662,26 @@ the definition of the symbol. There is this directive: production list, you can reference to token productions using :rst:role:`token`. - The *productionGroup* argument to :rst:dir:`productionlist` serves to + The *production_group* argument to :rst:dir:`productionlist` serves to distinguish different sets of production lists that belong to different - grammars. Multiple production lists with the same *productionGroup* thus + grammars. Multiple production lists with the same *production_group* thus define rules in the same scope. Inside of the production list, tokens implicitly refer to productions from the current group. You can refer to the production of another grammar by prefixing the token with its group name and a colon, e.g, - "``otherGroup:sum``". If the group of the token should not be shown in + "``other-group:sum``". If the group of the token should not be shown in the production, it can be prefixed by a tilde, e.g., - "``~otherGroup:sum``". To refer to a production from an unnamed + "``~other-group:sum``". To refer to a production from an unnamed grammar, the token should be prefixed by a colon, e.g., "``:sum``". Outside of the production list, - if you have given a *productionGroup* argument you must prefix the + if you have given a *production_group* argument you must prefix the token name in the cross-reference with the group name and a colon, - e.g., "``myGroup:sum``" instead of just "``sum``". + e.g., "``my_group:sum``" instead of just "``sum``". If the group should not be shown in the title of the link either - an explicit title can be given (e.g., "``myTitle ``"), - or the target can be prefixed with a tilde (e.g., "``~myGroup:sum``"). + an explicit title can be given (e.g., "``myTitle ``"), + or the target can be prefixed with a tilde (e.g., "``~my_group:sum``"). Note that no further reStructuredText parsing is done in the production, so that you don't have to escape ``*`` or ``|`` characters. diff --git a/sphinx/domains/std/__init__.py b/sphinx/domains/std/__init__.py index f6f176debd5..bda76de9ba5 100644 --- a/sphinx/domains/std/__init__.py +++ b/sphinx/domains/std/__init__.py @@ -22,7 +22,7 @@ from sphinx.util.parsing import nested_parse_to_nodes if TYPE_CHECKING: - from collections.abc import Callable, Iterable, Iterator, Set + from collections.abc import Callable, Iterable, Iterator, Sequence, Set from typing import Any, ClassVar, Final from docutils.nodes import Element, Node, system_message @@ -597,41 +597,91 @@ class ProductionList(SphinxDirective): option_spec: ClassVar[OptionSpec] = {} def run(self) -> list[Node]: - domain = self.env.domains.standard_domain - node: Element = addnodes.productionlist() + node = addnodes.productionlist() self.set_source_info(node) # The backslash handling is from ObjectDescription.get_signatures nl_escape_re = re.compile(r'\\\n') lines = nl_escape_re.sub('', self.arguments[0]).split('\n') + production_group = self.production_group(lines, self.options) + production_lines = list(self.production_definitions(lines)) + max_len = max(len(name) for _, name, _ in production_lines) + node_location = self.get_location() + node += [ + self.make_production( + rawsource=rule, + name=name, + tokens=tokens, + production_group=production_group, + max_len=max_len, + location=node_location, + ) + for rule, name, tokens in production_lines + ] + return [node] - production_group = '' - first_rule_seen = False - for rule in lines: - if not first_rule_seen and ':' not in rule: - production_group = rule.strip() - continue - first_rule_seen = True - try: - name, tokens = rule.split(':', 1) - except ValueError: + @staticmethod + def production_group(lines: Sequence[str], options: dict[str, Any]) -> str: # NoQA: ARG004 + # get production_group + if not lines or ':' in lines[0]: + return '' + production_group = lines[0].strip() + lines[:] = lines[1:] + return production_group + + @staticmethod + def production_definitions(lines: Iterable[str]) -> Iterator[tuple[str, str, str]]: + """Yield triples of rawsource, name, definition.""" + for line in lines: + if ':' not in line: break - subnode = addnodes.production(rule) - name = name.strip() - subnode['tokenname'] = name - if subnode['tokenname']: - prefix = 'grammar-token-%s' % production_group - node_id = make_id(self.env, self.state.document, prefix, name) - subnode['ids'].append(node_id) - self.state.document.note_implicit_target(subnode, subnode) - - if len(production_group) != 0: - obj_name = f'{production_group}:{name}' - else: - obj_name = name - domain.note_object('token', obj_name, node_id, location=node) - subnode.extend(token_xrefs(tokens, production_group=production_group)) - node.append(subnode) - return [node] + name, _, tokens = line.partition(':') + yield line, name.strip(), tokens.strip() + + def make_production( + self, + rawsource: str, + name: str, + tokens: str, + production_group: str, + max_len: int, + location: str, + ) -> addnodes.production: + production_node = addnodes.production(rawsource, tokenname=name) + if name: + production_node += self.make_target(name, production_group, location) + else: + production_node += self.continuation_padding(max_len) + production_node.append(self.production_separator(name, max_len)) + production_node += token_xrefs(tokens, production_group=production_group) + production_node.append(nodes.Text('\n')) + return production_node + + def make_target( + self, + name: str, + production_group: str, + location: str, + ) -> addnodes.literal_strong: + """Make a link target for the given production.""" + name_node = addnodes.literal_strong(name, name) + prefix = f'grammar-token-{production_group}' + node_id = make_id(self.env, self.state.document, prefix, name) + name_node['ids'].append(node_id) + self.state.document.note_implicit_target(name_node, name_node) + obj_name = f'{production_group}:{name}' if production_group else name + std = self.env.domains.standard_domain + std.note_object('token', obj_name, node_id, location=location) + return name_node + + @staticmethod + def continuation_padding(max_len: int) -> nodes.Text: + return nodes.Text(' ' * max_len) + + @staticmethod + def production_separator(name: str, max_len: int) -> nodes.Text: + if name: + return nodes.Text(' ::= '.rjust(max_len - len(name) + 5)) + return nodes.Text(' ') class TokenXRefRole(XRefRole): diff --git a/sphinx/writers/html5.py b/sphinx/writers/html5.py index 86ab4f4c21e..683eebbf86b 100644 --- a/sphinx/writers/html5.py +++ b/sphinx/writers/html5.py @@ -5,7 +5,7 @@ import posixpath import re import urllib.parse -from typing import TYPE_CHECKING, cast +from typing import TYPE_CHECKING from docutils import nodes from docutils.writers.html5_polyglot import HTMLTranslator as BaseTranslator @@ -17,8 +17,6 @@ from sphinx.util.images import get_image_size if TYPE_CHECKING: - from collections.abc import Iterable - from docutils.nodes import Element, Node, Text from sphinx.builders import Builder @@ -695,23 +693,9 @@ def depart_literal(self, node: Element) -> None: def visit_productionlist(self, node: Element) -> None: self.body.append(self.starttag(node, 'pre')) - productionlist = cast('Iterable[addnodes.production]', node) - maxlen = max(len(production['tokenname']) for production in productionlist) - lastname = None - for production in productionlist: - if production['tokenname']: - lastname = production['tokenname'].ljust(maxlen) - self.body.append(self.starttag(production, 'strong', '')) - self.body.append(lastname + ' ::= ') - elif lastname is not None: - self.body.append(' ' * (maxlen + 5)) - production.walkabout(self) - self.body.append('\n') - self.body.append('\n') - raise nodes.SkipNode def depart_productionlist(self, node: Element) -> None: - pass + self.body.append('\n') def visit_production(self, node: Element) -> None: pass diff --git a/sphinx/writers/latex.py b/sphinx/writers/latex.py index ee779ea7c31..72c7e9b3bea 100644 --- a/sphinx/writers/latex.py +++ b/sphinx/writers/latex.py @@ -323,7 +323,7 @@ def __init__( # flags self.in_title = 0 - self.in_production_list = 0 + self.in_production_list = False self.in_footnote = 0 self.in_caption = 0 self.in_term = 0 @@ -671,20 +671,25 @@ def depart_glossary(self, node: Element) -> None: def visit_productionlist(self, node: Element) -> None: self.body.append(BLANKLINE) self.body.append(r'\begin{productionlist}' + CR) - self.in_production_list = 1 + self.in_production_list = True def depart_productionlist(self, node: Element) -> None: self.body.append(r'\end{productionlist}' + BLANKLINE) - self.in_production_list = 0 + self.in_production_list = False def visit_production(self, node: Element) -> None: if node['tokenname']: tn = node['tokenname'] - self.body.append(self.hypertarget('grammar-token-' + tn)) + self.body.append(self.hypertarget(f'grammar-token-{tn}')) self.body.append(r'\production{%s}{' % self.encode(tn)) else: self.body.append(r'\productioncont{') + # remove name/padding and seperator child nodes, + # these are handled by '\production' and '\productioncont' + # TODO: remove special LaTeX handling of production nodes + node[:] = node[2:] + def depart_production(self, node: Element) -> None: self.body.append('}' + CR) @@ -2070,9 +2075,13 @@ def depart_strong(self, node: Element) -> None: self.body.append('}') def visit_literal_strong(self, node: Element) -> None: + if self.in_production_list: + return self.body.append(r'\sphinxstyleliteralstrong{\sphinxupquote{') def depart_literal_strong(self, node: Element) -> None: + if self.in_production_list: + return self.body.append('}}') def visit_abbreviation(self, node: Element) -> None: diff --git a/sphinx/writers/manpage.py b/sphinx/writers/manpage.py index 3fa4638c954..171761fa2b0 100644 --- a/sphinx/writers/manpage.py +++ b/sphinx/writers/manpage.py @@ -79,8 +79,6 @@ class ManualPageTranslator(SphinxTranslator, BaseTranslator): # type: ignore[mi def __init__(self, document: nodes.document, builder: Builder) -> None: super().__init__(document, builder) - self.in_productionlist = 0 - # first title is the manpage title self.section_level = -1 @@ -274,25 +272,10 @@ def depart_seealso(self, node: Element) -> None: def visit_productionlist(self, node: Element) -> None: self.ensure_eol() - self.in_productionlist += 1 self.body.append('.sp\n.nf\n') - productionlist = cast('Iterable[addnodes.production]', node) - maxlen = max(len(production['tokenname']) for production in productionlist) - lastname = None - for production in productionlist: - if production['tokenname']: - lastname = production['tokenname'].ljust(maxlen) - self.body.append(self.defs['strong'][0]) - self.body.append(self.deunicode(lastname)) - self.body.append(self.defs['strong'][1]) - self.body.append(' ::= ') - elif lastname is not None: - self.body.append(' ' * (maxlen + 5)) - production.walkabout(self) - self.body.append('\n') + + def depart_productionlist(self, node: Element) -> None: self.body.append('\n.fi\n') - self.in_productionlist -= 1 - raise nodes.SkipNode def visit_production(self, node: Element) -> None: pass diff --git a/sphinx/writers/texinfo.py b/sphinx/writers/texinfo.py index d3da34f108f..5a7826a5dd3 100644 --- a/sphinx/writers/texinfo.py +++ b/sphinx/writers/texinfo.py @@ -189,6 +189,7 @@ def __init__(self, document: nodes.document, builder: TexinfoBuilder) -> None: self.escape_hyphens = 0 self.curfilestack: list[str] = [] self.footnotestack: list[dict[str, list[collected_footnote | bool]]] = [] + self.in_production_list = False self.in_footnote = 0 self.in_samp = 0 self.handled_abbrs: set[str] = set() @@ -1308,20 +1309,11 @@ def unknown_departure(self, node: Node) -> None: def visit_productionlist(self, node: Element) -> None: self.visit_literal_block(None) - productionlist = cast('Iterable[addnodes.production]', node) - maxlen = max(len(production['tokenname']) for production in productionlist) - - for production in productionlist: - if production['tokenname']: - for id in production.get('ids'): - self.add_anchor(id, production) - s = production['tokenname'].ljust(maxlen) + ' ::=' - else: - s = ' ' * (maxlen + 4) - self.body.append(self.escape(s)) - self.body.append(self.escape(production.astext() + '\n')) + self.in_production_list = True + + def depart_productionlist(self, node: Element) -> None: + self.in_production_list = False self.depart_literal_block(None) - raise nodes.SkipNode def visit_production(self, node: Element) -> None: pass @@ -1336,9 +1328,15 @@ def depart_literal_emphasis(self, node: Element) -> None: self.body.append('}') def visit_literal_strong(self, node: Element) -> None: + if self.in_production_list: + for id_ in node['ids']: + self.add_anchor(id_, node) + return self.body.append('@code{') def depart_literal_strong(self, node: Element) -> None: + if self.in_production_list: + return self.body.append('}') def visit_index(self, node: Element) -> None: diff --git a/sphinx/writers/text.py b/sphinx/writers/text.py index d712fd133ed..84ce9ccc7ab 100644 --- a/sphinx/writers/text.py +++ b/sphinx/writers/text.py @@ -408,6 +408,7 @@ def __init__(self, document: nodes.document, builder: TextBuilder) -> None: self.sectionlevel = 0 self.lineblocklevel = 0 self.table: Table + self.in_production_list = False self.context: list[str] = [] """Heterogeneous stack. @@ -787,18 +788,17 @@ def depart_caption(self, node: Element) -> None: def visit_productionlist(self, node: Element) -> None: self.new_state() - productionlist = cast('Iterable[addnodes.production]', node) - maxlen = max(len(production['tokenname']) for production in productionlist) - lastname = None - for production in productionlist: - if production['tokenname']: - self.add_text(production['tokenname'].ljust(maxlen) + ' ::=') - lastname = production['tokenname'] - elif lastname is not None: - self.add_text(' ' * (maxlen + 4)) - self.add_text(production.astext() + self.nl) + self.in_production_list = True + + def depart_productionlist(self, node: Element) -> None: + self.in_production_list = False self.end_state(wrap=False) - raise nodes.SkipNode + + def visit_production(self, node: Element) -> None: + pass + + def depart_production(self, node: Element) -> None: + pass def visit_footnote(self, node: Element) -> None: label = cast('nodes.label', node[0]) @@ -1224,9 +1224,13 @@ def depart_strong(self, node: Element) -> None: self.add_text('**') def visit_literal_strong(self, node: Element) -> None: + if self.in_production_list: + return self.add_text('**') def depart_literal_strong(self, node: Element) -> None: + if self.in_production_list: + return self.add_text('**') def visit_abbreviation(self, node: Element) -> None: @@ -1249,9 +1253,13 @@ def depart_title_reference(self, node: Element) -> None: self.add_text('*') def visit_literal(self, node: Element) -> None: + if self.in_production_list: + return self.add_text('"') def depart_literal(self, node: Element) -> None: + if self.in_production_list: + return self.add_text('"') def visit_subscript(self, node: Element) -> None: diff --git a/tests/test_directives/test_directive_productionlist.py b/tests/test_directives/test_directive_productionlist.py index 2026730c64b..b127936ca1b 100644 --- a/tests/test_directives/test_directive_productionlist.py +++ b/tests/test_directives/test_directive_productionlist.py @@ -78,7 +78,7 @@ def test_productionlist(app: SphinxTestApp) -> None: ] text = (app.outdir / 'LineContinuation.html').read_text(encoding='utf8') - assert 'A ::= B C D E F G' in text + assert 'A ::= B C D E F G' in text @pytest.mark.sphinx('html', testroot='root') @@ -140,14 +140,14 @@ def test_productionlist_continuation_lines( _, _, content = content.partition('
')
     content, _, _ = content.partition('
') expected = """ -assignment_stmt ::= (target_list "=")+ (starred_expression | yield_expression) -target_list ::= target ("," target)* [","] -target ::= identifier - | "(" [target_list] ")" - | "[" [target_list] "]" - | attributeref - | subscription - | slicing - | "*" target +assignment_stmt ::= (target_list "=")+ (starred_expression | yield_expression) +target_list ::= target ("," target)* [","] +target ::= identifier + | "(" [target_list] ")" + | "[" [target_list] "]" + | attributeref + | subscription + | slicing + | "*" target """ assert content == expected From aefc58164c88de5b2730a5c80d9ba19875d02ce5 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Mon, 10 Feb 2025 20:09:55 +0000 Subject: [PATCH 02/11] fixup! Remove ``productionlist`` hard-coding in translators --- CHANGES.rst | 3 +++ sphinx/domains/std/__init__.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 991c2848483..3288037591c 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -104,6 +104,9 @@ Features added * #9169: Add the :confval:`intersphinx_resolve_self` option to resolve an intersphinx reference to the current project. Patch by Jakob Lykke Andersen and Adam Turner. +* #13326: Remove hardcoding from handling :class:`~sphinx.addnodes.productionlist` + nodes in all writers, to improve flexibility. + Patch by Adam Turner. Bugs fixed ---------- diff --git a/sphinx/domains/std/__init__.py b/sphinx/domains/std/__init__.py index bda76de9ba5..b879570cfc4 100644 --- a/sphinx/domains/std/__init__.py +++ b/sphinx/domains/std/__init__.py @@ -620,7 +620,7 @@ def run(self) -> list[Node]: return [node] @staticmethod - def production_group(lines: Sequence[str], options: dict[str, Any]) -> str: # NoQA: ARG004 + def production_group(lines: list[str], options: dict[str, Any]) -> str: # NoQA: ARG004 # get production_group if not lines or ':' in lines[0]: return '' From 12b4700c9404d5d9c346023e60b6ee1cdaf39f08 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Mon, 10 Feb 2025 20:30:43 +0000 Subject: [PATCH 03/11] fixup! Remove ``productionlist`` hard-coding in translators --- sphinx/domains/std/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sphinx/domains/std/__init__.py b/sphinx/domains/std/__init__.py index b879570cfc4..b24af59fd49 100644 --- a/sphinx/domains/std/__init__.py +++ b/sphinx/domains/std/__init__.py @@ -22,7 +22,7 @@ from sphinx.util.parsing import nested_parse_to_nodes if TYPE_CHECKING: - from collections.abc import Callable, Iterable, Iterator, Sequence, Set + from collections.abc import Callable, Iterable, Iterator, Set from typing import Any, ClassVar, Final from docutils.nodes import Element, Node, system_message From e7083a7388eac9d1f27a78c07d9c9e349de1d89a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-Fran=C3=A7ois=20B?= <2589111+jfbu@users.noreply.github.com> Date: Tue, 11 Feb 2025 10:52:02 +0100 Subject: [PATCH 04/11] LaTeX support --- sphinx/texinputs/sphinxlatexobjects.sty | 39 ++++++++++++++++++------- sphinx/writers/latex.py | 16 +++------- 2 files changed, 33 insertions(+), 22 deletions(-) diff --git a/sphinx/texinputs/sphinxlatexobjects.sty b/sphinx/texinputs/sphinxlatexobjects.sty index e960440cafb..1147a016227 100644 --- a/sphinx/texinputs/sphinxlatexobjects.sty +++ b/sphinx/texinputs/sphinxlatexobjects.sty @@ -1,7 +1,7 @@ %% MODULE RELEASE DATA AND OBJECT DESCRIPTIONS % % change this info string if making any custom modification -\ProvidesPackage{sphinxlatexobjects}[2023/07/23 documentation environments] +\ProvidesPackage{sphinxlatexobjects}[2025/02/11 documentation environments] % Provides support for this output mark-up from Sphinx latex writer: % @@ -279,18 +279,37 @@ \newcommand{\pysigstopmultiline}{\sphinxsigismultilinefalse\itemsep\sphinxsignaturesep}% % Production lists +% This simply outputs the lines as is, in monospace font. Refers #13326. +% (the left padding for multi-line alignment is from the nodes themselves, +% and latex is configured below to obey such horizontal whitespace). +% +% - The legacy code used longtable and hardcoded the separator as ::= +% via dedicated macros defined by the environment itself. +% - Here the separator is part of the node. Any extra LaTeX mark-up would +% have to originate from the writer itself to decorate it. +% - The legacy code used strangely \parindent and \indent. Possibly +% (unchecked) due to an earlier tabular usage, but a longtable does not +% work in paragraph mode, so \parindent was without effect and +% \indent only caused some extra blank line above display. +% - The table had some whitespace on its left, which we imitate here via +% \parindent usage (which works in our context...). % \newenvironment{productionlist}{% -% \def\sphinxoptional##1{{\Large[}##1{\Large]}} - \def\production##1##2{\\\sphinxcode{\sphinxupquote{##1}}&::=&\sphinxcode{\sphinxupquote{##2}}}% - \def\productioncont##1{\\& &\sphinxcode{\sphinxupquote{##1}}}% - \parindent=2em - \indent - \setlength{\LTpre}{0pt}% - \setlength{\LTpost}{0pt}% - \begin{longtable}[l]{lcl} + \bigskip % imitate close enough legacy vertical whitespace, which was + % visibly excessive + \ttfamily % needed for space tokens to have same width as letters + \parindent1em % width of a "quad", font-dependent, usually circa width of 2 + % letters + \obeylines % line in = line out + \parskip\z@skip % prevent the parskip vertical whitespace between lines, + % which are technically to LaTeX now each its own paragraph + \@vobeyspaces % obey whitespace + % now a technicality to, only locally to this environment, prevent the + % suppression of indentation of first line, if it comes right after + % \section. Cf package indentfirst from which the code is borrowed. + \let\@afterindentfalse\@afterindenttrue\@afterindenttrue }{% - \end{longtable} + \par % does not hurt... } % Definition lists; requested by AMK for HOWTO documents. Probably useful diff --git a/sphinx/writers/latex.py b/sphinx/writers/latex.py index 72c7e9b3bea..e018ea91e0a 100644 --- a/sphinx/writers/latex.py +++ b/sphinx/writers/latex.py @@ -678,20 +678,12 @@ def depart_productionlist(self, node: Element) -> None: self.in_production_list = False def visit_production(self, node: Element) -> None: - if node['tokenname']: - tn = node['tokenname'] - self.body.append(self.hypertarget(f'grammar-token-{tn}')) - self.body.append(r'\production{%s}{' % self.encode(tn)) - else: - self.body.append(r'\productioncont{') - - # remove name/padding and seperator child nodes, - # these are handled by '\production' and '\productioncont' - # TODO: remove special LaTeX handling of production nodes - node[:] = node[2:] + # Nothing to do, the productionlist LaTeX environment + # is configured to render line per line the nodes + pass def depart_production(self, node: Element) -> None: - self.body.append('}' + CR) + pass def visit_transition(self, node: Element) -> None: self.body.append(self.elements['transition']) From 2137a2872db39a9d784915c20a58662f86dbff62 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Tue, 11 Feb 2025 18:32:33 +0000 Subject: [PATCH 05/11] Rewrite directive documentation --- doc/usage/restructuredtext/directives.rst | 76 ++++++++++++----------- sphinx/writers/latex.py | 4 +- 2 files changed, 41 insertions(+), 39 deletions(-) diff --git a/doc/usage/restructuredtext/directives.rst b/doc/usage/restructuredtext/directives.rst index 5d7d241d600..33269b522a6 100644 --- a/doc/usage/restructuredtext/directives.rst +++ b/doc/usage/restructuredtext/directives.rst @@ -1642,49 +1642,51 @@ Grammar production displays --------------------------- Special markup is available for displaying the productions of a formal grammar. -The markup is simple and does not attempt to model all aspects of BNF (or any -derived forms), but provides enough to allow context-free grammars to be -displayed in a way that causes uses of a symbol to be rendered as hyperlinks to -the definition of the symbol. There is this directive: +The markup is simple and does not attempt to model all aspects of BNF_ +(or any derived forms), but provides enough to allow context-free grammars +to be displayed in a way that causes uses of a symbol to be rendered +as hyperlinks to the definition of the symbol. +There is this directive: + +.. _BNF: https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form .. rst:directive:: .. productionlist:: [production_group] - This directive is used to enclose a group of productions. Each production - is given on a single line and consists of a name, separated by a colon from - the following definition. If the definition spans multiple lines, each - continuation line must begin with a colon placed at the same column as in - the first line. + This directive is used to enclose a group of productions. + Each production is given on a single line and consists of a name, + separated by a colon from the following definition. + If the definition spans multiple lines, each continuation line + must begin with a colon placed at the same column as in the first line. Blank lines are not allowed within ``productionlist`` directive arguments. - The definition can contain token names which are marked as interpreted text - (e.g., "``sum ::= `integer` "+" `integer```") -- this generates - cross-references to the productions of these tokens. Outside of the - production list, you can reference to token productions using - :rst:role:`token`. - - The *production_group* argument to :rst:dir:`productionlist` serves to - distinguish different sets of production lists that belong to different - grammars. Multiple production lists with the same *production_group* thus - define rules in the same scope. - - Inside of the production list, tokens implicitly refer to productions - from the current group. You can refer to the production of another - grammar by prefixing the token with its group name and a colon, e.g, - "``other-group:sum``". If the group of the token should not be shown in - the production, it can be prefixed by a tilde, e.g., - "``~other-group:sum``". To refer to a production from an unnamed - grammar, the token should be prefixed by a colon, e.g., "``:sum``". - - Outside of the production list, - if you have given a *production_group* argument you must prefix the - token name in the cross-reference with the group name and a colon, + The optional *production_group* directive argument serves to distinguish + different sets of production lists that belong to different grammars. + Multiple production lists with the same *production_group* + thus define rules in the same scope. + This can also be used to split the description of a long or complex grammar + accross multiple ``productionlist`` directives with the same *production_group*. + + The definition can contain token names which are marked as interpreted text, + (e.g. "``sum ::= `integer` "+" `integer```"), + to generate cross-references to the productions of these tokens. + Such cross-references implicitly refer to productions from the current group. + To reference a production from another grammar, the token name + must be prefixed with the group name and a colon, e.g. "``other-group:sum``". + If the group of the token should not be shown in the production, + it can be prefixed by a tilde, e.g., "``~other-group:sum``". + To refer to a production from an unnamed grammar, + the token should be prefixed by a colon, e.g., "``:sum``". + No further reStructuredText parsing is done in the production, + so that special characters (``*``, ``|``, etc) do not need to be escaped. + + Token productions can be cross-referenced outwith the production list + by using the :rst:role:`token` role. + If you have used a *production_group* argument, + the token name must be prefixed with the group name and a colon, e.g., "``my_group:sum``" instead of just "``sum``". - If the group should not be shown in the title of the link either - an explicit title can be given (e.g., "``myTitle ``"), - or the target can be prefixed with a tilde (e.g., "``~my_group:sum``"). - - Note that no further reStructuredText parsing is done in the production, - so that you don't have to escape ``*`` or ``|`` characters. + Standard :ref:`cross-referencing modifiers ` + may be used with the ``:token:`` role, + such as custom link text and suppressing the group name with a tilde (``~``). The following is an example taken from the Python Reference Manual:: diff --git a/sphinx/writers/latex.py b/sphinx/writers/latex.py index e018ea91e0a..378364797d7 100644 --- a/sphinx/writers/latex.py +++ b/sphinx/writers/latex.py @@ -674,12 +674,12 @@ def visit_productionlist(self, node: Element) -> None: self.in_production_list = True def depart_productionlist(self, node: Element) -> None: - self.body.append(r'\end{productionlist}' + BLANKLINE) self.in_production_list = False + self.body.append(r'\end{productionlist}' + BLANKLINE) def visit_production(self, node: Element) -> None: # Nothing to do, the productionlist LaTeX environment - # is configured to render line per line the nodes + # is configured to render the nodes line-by-line pass def depart_production(self, node: Element) -> None: From 852b2abf88b24a254096f6a85267e662247fbcbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-Fran=C3=A7ois=20B?= <2589111+jfbu@users.noreply.github.com> Date: Tue, 11 Feb 2025 21:03:09 +0100 Subject: [PATCH 06/11] Add targets for hyperlinks to productions --- sphinx/writers/latex.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sphinx/writers/latex.py b/sphinx/writers/latex.py index 378364797d7..c0d763e43a7 100644 --- a/sphinx/writers/latex.py +++ b/sphinx/writers/latex.py @@ -680,6 +680,7 @@ def depart_productionlist(self, node: Element) -> None: def visit_production(self, node: Element) -> None: # Nothing to do, the productionlist LaTeX environment # is configured to render the nodes line-by-line + # But see also visit_literal_strong special clause. pass def depart_production(self, node: Element) -> None: @@ -2068,6 +2069,10 @@ def depart_strong(self, node: Element) -> None: def visit_literal_strong(self, node: Element) -> None: if self.in_production_list: + ctx = r'\phantomsection' + for id_ in node['ids']: + ctx += self.hypertarget(id_, anchor=False) + self.body.append(ctx) return self.body.append(r'\sphinxstyleliteralstrong{\sphinxupquote{') From f3045b08ddd2e5a1e6283cd8818b900ccac474ae Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Wed, 12 Feb 2025 01:33:12 +0000 Subject: [PATCH 07/11] Prefer str.join to += --- sphinx/writers/latex.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sphinx/writers/latex.py b/sphinx/writers/latex.py index c0d763e43a7..bfbdb268aaf 100644 --- a/sphinx/writers/latex.py +++ b/sphinx/writers/latex.py @@ -2069,10 +2069,9 @@ def depart_strong(self, node: Element) -> None: def visit_literal_strong(self, node: Element) -> None: if self.in_production_list: - ctx = r'\phantomsection' - for id_ in node['ids']: - ctx += self.hypertarget(id_, anchor=False) - self.body.append(ctx) + ctx = [r'\phantomsection'] + ctx += [self.hypertarget(id_, anchor=False) for id_ in node['ids']] + self.body.append(''.join(ctx)) return self.body.append(r'\sphinxstyleliteralstrong{\sphinxupquote{') From b3b9874ec7c0dcaceb5afc7152958593f33ed23b Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Wed, 12 Feb 2025 19:23:56 +0000 Subject: [PATCH 08/11] Simplify --- sphinx/domains/std/__init__.py | 65 ++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 26 deletions(-) diff --git a/sphinx/domains/std/__init__.py b/sphinx/domains/std/__init__.py index b24af59fd49..b25a3cc89ce 100644 --- a/sphinx/domains/std/__init__.py +++ b/sphinx/domains/std/__init__.py @@ -2,6 +2,7 @@ from __future__ import annotations +import operator import re from copy import copy from typing import TYPE_CHECKING, cast @@ -22,7 +23,7 @@ from sphinx.util.parsing import nested_parse_to_nodes if TYPE_CHECKING: - from collections.abc import Callable, Iterable, Iterator, Set + from collections.abc import Callable, Iterable, Iterator, MutableSequence, Set from typing import Any, ClassVar, Final from docutils.nodes import Element, Node, system_message @@ -553,7 +554,7 @@ def run(self) -> list[Node]: return [*messages, node] -def token_xrefs(text: str, production_group: str = '') -> list[Node]: +def token_xrefs(text: str, production_group: str = '') -> Iterable[Node]: if len(production_group) != 0: production_group += ':' retnodes: list[Node] = [] @@ -596,31 +597,42 @@ class ProductionList(SphinxDirective): final_argument_whitespace = True option_spec: ClassVar[OptionSpec] = {} + # The backslash handling is from ObjectDescription.get_signatures + _nl_escape_re: Final = re.compile(r'\\\n') + + # Get 'name' from triples of rawsource, name, definition (tokens) + _name_getter = operator.itemgetter(1) + def run(self) -> list[Node]: - node = addnodes.productionlist() - self.set_source_info(node) - # The backslash handling is from ObjectDescription.get_signatures - nl_escape_re = re.compile(r'\\\n') - lines = nl_escape_re.sub('', self.arguments[0]).split('\n') - production_group = self.production_group(lines, self.options) + lines = self._nl_escape_re.sub('', self.arguments[0]).splitlines() production_lines = list(self.production_definitions(lines)) - max_len = max(len(name) for _, name, _ in production_lines) + + name_getter = self._name_getter + production_group = self.production_group(lines=lines, options=self.options) + max_name_len = max(map(len, map(name_getter, production_lines))) node_location = self.get_location() - node += [ + + productions = [ self.make_production( rawsource=rule, name=name, tokens=tokens, production_group=production_group, - max_len=max_len, + max_len=max_name_len, location=node_location, ) for rule, name, tokens in production_lines ] + node = addnodes.productionlist('', *productions) + self.set_source_info(node) return [node] @staticmethod - def production_group(lines: list[str], options: dict[str, Any]) -> str: # NoQA: ARG004 + def production_group( + *, + lines: MutableSequence[str], + options: dict[str, Any], # NoQA: ARG004 + ) -> str: # get production_group if not lines or ':' in lines[0]: return '' @@ -629,8 +641,10 @@ def production_group(lines: list[str], options: dict[str, Any]) -> str: # NoQA: return production_group @staticmethod - def production_definitions(lines: Iterable[str]) -> Iterator[tuple[str, str, str]]: - """Yield triples of rawsource, name, definition.""" + def production_definitions( + lines: Iterable[str], / + ) -> Iterator[tuple[str, str, str]]: + """Yield triples of rawsource, name, definition (tokens).""" for line in lines: if ':' not in line: break @@ -639,6 +653,7 @@ def production_definitions(lines: Iterable[str]) -> Iterator[tuple[str, str, str def make_production( self, + *, rawsource: str, name: str, tokens: str, @@ -648,16 +663,17 @@ def make_production( ) -> addnodes.production: production_node = addnodes.production(rawsource, tokenname=name) if name: - production_node += self.make_target(name, production_group, location) - else: - production_node += self.continuation_padding(max_len) - production_node.append(self.production_separator(name, max_len)) - production_node += token_xrefs(tokens, production_group=production_group) + production_node += self.make_name_target( + name=name, production_group=production_group, location=location + ) + production_node.append(self.separator_node(name=name, max_len=max_len)) + production_node += token_xrefs(text=tokens, production_group=production_group) production_node.append(nodes.Text('\n')) return production_node - def make_target( + def make_name_target( self, + *, name: str, production_group: str, location: str, @@ -674,14 +690,11 @@ def make_target( return name_node @staticmethod - def continuation_padding(max_len: int) -> nodes.Text: - return nodes.Text(' ' * max_len) - - @staticmethod - def production_separator(name: str, max_len: int) -> nodes.Text: + def separator_node(*, name: str, max_len: int) -> nodes.Text: + """Return seperator between 'name' and 'tokens'.""" if name: return nodes.Text(' ::= '.rjust(max_len - len(name) + 5)) - return nodes.Text(' ') + return nodes.Text(' ' * (max_len + 5)) class TokenXRefRole(XRefRole): From 233fc9297012456da04e8227564eb75ead34a757 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Wed, 12 Feb 2025 19:33:20 +0000 Subject: [PATCH 09/11] Work around max-empty-iterable error --- sphinx/domains/std/__init__.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/sphinx/domains/std/__init__.py b/sphinx/domains/std/__init__.py index b25a3cc89ce..1845fed2da0 100644 --- a/sphinx/domains/std/__init__.py +++ b/sphinx/domains/std/__init__.py @@ -2,7 +2,6 @@ from __future__ import annotations -import operator import re from copy import copy from typing import TYPE_CHECKING, cast @@ -600,16 +599,12 @@ class ProductionList(SphinxDirective): # The backslash handling is from ObjectDescription.get_signatures _nl_escape_re: Final = re.compile(r'\\\n') - # Get 'name' from triples of rawsource, name, definition (tokens) - _name_getter = operator.itemgetter(1) - def run(self) -> list[Node]: lines = self._nl_escape_re.sub('', self.arguments[0]).splitlines() production_lines = list(self.production_definitions(lines)) - name_getter = self._name_getter production_group = self.production_group(lines=lines, options=self.options) - max_name_len = max(map(len, map(name_getter, production_lines))) + max_name_len = max(len(name) for _, name, _ in production_lines) node_location = self.get_location() productions = [ From a1747b0f08bb2299e369fbc76befbf4ff65da12f Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Wed, 12 Feb 2025 19:42:58 +0000 Subject: [PATCH 10/11] Revert "Work around max-empty-iterable error" This reverts commit 233fc9297012456da04e8227564eb75ead34a757. --- sphinx/domains/std/__init__.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sphinx/domains/std/__init__.py b/sphinx/domains/std/__init__.py index 1845fed2da0..b25a3cc89ce 100644 --- a/sphinx/domains/std/__init__.py +++ b/sphinx/domains/std/__init__.py @@ -2,6 +2,7 @@ from __future__ import annotations +import operator import re from copy import copy from typing import TYPE_CHECKING, cast @@ -599,12 +600,16 @@ class ProductionList(SphinxDirective): # The backslash handling is from ObjectDescription.get_signatures _nl_escape_re: Final = re.compile(r'\\\n') + # Get 'name' from triples of rawsource, name, definition (tokens) + _name_getter = operator.itemgetter(1) + def run(self) -> list[Node]: lines = self._nl_escape_re.sub('', self.arguments[0]).splitlines() production_lines = list(self.production_definitions(lines)) + name_getter = self._name_getter production_group = self.production_group(lines=lines, options=self.options) - max_name_len = max(len(name) for _, name, _ in production_lines) + max_name_len = max(map(len, map(name_getter, production_lines))) node_location = self.get_location() productions = [ From 7b2d33ff8f06c125a0384320bab715bd8d9c4747 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Wed, 12 Feb 2025 19:51:30 +0000 Subject: [PATCH 11/11] Fix order --- sphinx/domains/std/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sphinx/domains/std/__init__.py b/sphinx/domains/std/__init__.py index b25a3cc89ce..7909138ace9 100644 --- a/sphinx/domains/std/__init__.py +++ b/sphinx/domains/std/__init__.py @@ -604,11 +604,13 @@ class ProductionList(SphinxDirective): _name_getter = operator.itemgetter(1) def run(self) -> list[Node]: + name_getter = self._name_getter lines = self._nl_escape_re.sub('', self.arguments[0]).splitlines() - production_lines = list(self.production_definitions(lines)) - name_getter = self._name_getter + # Extract production_group argument. + # Must be before extracting production definition triples. production_group = self.production_group(lines=lines, options=self.options) + production_lines = list(self.production_definitions(lines)) max_name_len = max(map(len, map(name_getter, production_lines))) node_location = self.get_location()