sphinx-doc · AA-Turner · Feb 13, 2025 · Feb 10, 2025 · Feb 10, 2025 · Feb 10, 2025
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -108,6 +108,9 @@ Features added
   Patch by Jakob Lykke Andersen and Adam Turner.
 * #11280: Add ability to skip a particular section using the ``no-search`` class.
   Patch by Will Lachance.
+* #13326: Remove hardcoding from handling :class:`~sphinx.addnodes.productionlist`
+  nodes in all writers, to improve flexibility.
+  Patch by Adam Turner.
 
 Bugs fixed
 ----------

diff --git a/doc/usage/restructuredtext/directives.rst b/doc/usage/restructuredtext/directives.rst
@@ -1642,49 +1642,51 @@ Grammar production displays
 ---------------------------
 
 Special markup is available for displaying the productions of a formal grammar.
-The markup is simple and does not attempt to model all aspects of BNF (or any
-derived forms), but provides enough to allow context-free grammars to be
-displayed in a way that causes uses of a symbol to be rendered as hyperlinks to
-the definition of the symbol.  There is this directive:
-
-.. rst:directive:: .. productionlist:: [productionGroup]
-
-   This directive is used to enclose a group of productions.  Each production
-   is given on a single line and consists of a name, separated by a colon from
-   the following definition.  If the definition spans multiple lines, each
-   continuation line must begin with a colon placed at the same column as in
-   the first line.
+The markup is simple and does not attempt to model all aspects of BNF_
+(or any derived forms), but provides enough to allow context-free grammars
+to be displayed in a way that causes uses of a symbol to be rendered
+as hyperlinks to the definition of the symbol.
+There is this directive:
+
+.. _BNF: https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form
+
+.. rst:directive:: .. productionlist:: [production_group]
+
+   This directive is used to enclose a group of productions.
+   Each production is given on a single line and consists of a name,
+   separated by a colon from the following definition.
+   If the definition spans multiple lines, each continuation line
+   must begin with a colon placed at the same column as in the first line.
    Blank lines are not allowed within ``productionlist`` directive arguments.
 
-   The definition can contain token names which are marked as interpreted text
-   (e.g., "``sum ::= `integer` "+" `integer```") -- this generates
-   cross-references to the productions of these tokens.  Outside of the
-   production list, you can reference to token productions using
-   :rst:role:`token`.
-
-   The *productionGroup* argument to :rst:dir:`productionlist` serves to
-   distinguish different sets of production lists that belong to different
-   grammars.  Multiple production lists with the same *productionGroup* thus
-   define rules in the same scope.
-
-   Inside of the production list, tokens implicitly refer to productions
-   from the current group. You can refer to the production of another
-   grammar by prefixing the token with its group name and a colon, e.g,
-   "``otherGroup:sum``". If the group of the token should not be shown in
-   the production, it can be prefixed by a tilde, e.g.,
-   "``~otherGroup:sum``". To refer to a production from an unnamed
-   grammar, the token should be prefixed by a colon, e.g., "``:sum``".
-
-   Outside of the production list,
-   if you have given a *productionGroup* argument you must prefix the
-   token name in the cross-reference with the group name and a colon,
-   e.g., "``myGroup:sum``" instead of just "``sum``".
-   If the group should not be shown in the title of the link either
-   an explicit title can be given (e.g., "``myTitle <myGroup:sum>``"),
-   or the target can be prefixed with a tilde (e.g., "``~myGroup:sum``").
-
-   Note that no further reStructuredText parsing is done in the production,
-   so that you don't have to escape ``*`` or ``|`` characters.
+   The optional *production_group* directive argument serves to distinguish
+   different sets of production lists that belong to different grammars.
+   Multiple production lists with the same *production_group*
+   thus define rules in the same scope.
+   This can also be used to split the description of a long or complex grammar
+   accross multiple ``productionlist`` directives with the same *production_group*.
+
+   The definition can contain token names which are marked as interpreted text,
+   (e.g. "``sum ::= `integer` "+" `integer```"),
+   to generate cross-references to the productions of these tokens.
+   Such cross-references implicitly refer to productions from the current group.
+   To reference a production from another grammar, the token name
+   must be prefixed with the group name and a colon, e.g. "``other-group:sum``".
+   If the group of the token should not be shown in the production,
+   it can be prefixed by a tilde, e.g., "``~other-group:sum``".
+   To refer to a production from an unnamed grammar,
+   the token should be prefixed by a colon, e.g., "``:sum``".
+   No further reStructuredText parsing is done in the production,
+   so that special characters (``*``, ``|``, etc) do not need to be escaped.
+
+   Token productions can be cross-referenced outwith the production list
+   by using the :rst:role:`token` role.
+   If you have used a *production_group* argument,
+   the token name must be prefixed with the group name and a colon,
+   e.g., "``my_group:sum``" instead of just "``sum``".
+   Standard :ref:`cross-referencing modifiers <xref-modifiers>`
+   may be used with the ``:token:`` role,
+   such as custom link text and suppressing the group name with a tilde (``~``).
 
 The following is an example taken from the Python Reference Manual::
 

diff --git a/sphinx/domains/std/__init__.py b/sphinx/domains/std/__init__.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import operator
 import re
 from copy import copy
 from typing import TYPE_CHECKING, cast
@@ -22,7 +23,7 @@
 from sphinx.util.parsing import nested_parse_to_nodes
 
 if TYPE_CHECKING:
-    from collections.abc import Callable, Iterable, Iterator, Set
+    from collections.abc import Callable, Iterable, Iterator, MutableSequence, Set
     from typing import Any, ClassVar, Final
 
     from docutils.nodes import Element, Node, system_message
@@ -553,7 +554,7 @@ def run(self) -> list[Node]:
         return [*messages, node]
 
 
-def token_xrefs(text: str, production_group: str = '') -> list[Node]:
+def token_xrefs(text: str, production_group: str = '') -> Iterable[Node]:
     if len(production_group) != 0:
         production_group += ':'
     retnodes: list[Node] = []
@@ -596,43 +597,107 @@ class ProductionList(SphinxDirective):
     final_argument_whitespace = True
     option_spec: ClassVar[OptionSpec] = {}
 
+    # The backslash handling is from ObjectDescription.get_signatures
+    _nl_escape_re: Final = re.compile(r'\\\n')
+
+    # Get 'name' from triples of rawsource, name, definition (tokens)
+    _name_getter = operator.itemgetter(1)
+
     def run(self) -> list[Node]:
-        domain = self.env.domains.standard_domain
-        node: Element = addnodes.productionlist()
+        name_getter = self._name_getter
+        lines = self._nl_escape_re.sub('', self.arguments[0]).splitlines()
+
+        # Extract production_group argument.
+        # Must be before extracting production definition triples.
+        production_group = self.production_group(lines=lines, options=self.options)
+        production_lines = list(self.production_definitions(lines))
+        max_name_len = max(map(len, map(name_getter, production_lines)))
+        node_location = self.get_location()
+
+        productions = [
+            self.make_production(
+                rawsource=rule,
+                name=name,
+                tokens=tokens,
+                production_group=production_group,
+                max_len=max_name_len,
+                location=node_location,
+            )
+            for rule, name, tokens in production_lines
+        ]
+        node = addnodes.productionlist('', *productions)
         self.set_source_info(node)
-        # The backslash handling is from ObjectDescription.get_signatures
-        nl_escape_re = re.compile(r'\\\n')
-        lines = nl_escape_re.sub('', self.arguments[0]).split('\n')
-
-        production_group = ''
-        first_rule_seen = False
-        for rule in lines:
-            if not first_rule_seen and ':' not in rule:
-                production_group = rule.strip()
-                continue
-            first_rule_seen = True
-            try:
-                name, tokens = rule.split(':', 1)
-            except ValueError:
-                break
-            subnode = addnodes.production(rule)
-            name = name.strip()
-            subnode['tokenname'] = name
-            if subnode['tokenname']:
-                prefix = 'grammar-token-%s' % production_group
-                node_id = make_id(self.env, self.state.document, prefix, name)
-                subnode['ids'].append(node_id)
-                self.state.document.note_implicit_target(subnode, subnode)
-
-                if len(production_group) != 0:
-                    obj_name = f'{production_group}:{name}'
-                else:
-                    obj_name = name
-                domain.note_object('token', obj_name, node_id, location=node)
-            subnode.extend(token_xrefs(tokens, production_group=production_group))
-            node.append(subnode)
         return [node]
 
+    @staticmethod
+    def production_group(
+        *,
+        lines: MutableSequence[str],
+        options: dict[str, Any],  # NoQA: ARG004
+    ) -> str:
+        # get production_group
+        if not lines or ':' in lines[0]:
+            return ''
+        production_group = lines[0].strip()
+        lines[:] = lines[1:]
+        return production_group
+
+    @staticmethod
+    def production_definitions(
+        lines: Iterable[str], /
+    ) -> Iterator[tuple[str, str, str]]:
+        """Yield triples of rawsource, name, definition (tokens)."""
+        for line in lines:
+            if ':' not in line:
+                break
+            name, _, tokens = line.partition(':')
+            yield line, name.strip(), tokens.strip()
+
+    def make_production(
+        self,
+        *,
+        rawsource: str,
+        name: str,
+        tokens: str,
+        production_group: str,
+        max_len: int,
+        location: str,
+    ) -> addnodes.production:
+        production_node = addnodes.production(rawsource, tokenname=name)
+        if name:
+            production_node += self.make_name_target(
+                name=name, production_group=production_group, location=location
+            )
+        production_node.append(self.separator_node(name=name, max_len=max_len))
+        production_node += token_xrefs(text=tokens, production_group=production_group)
+        production_node.append(nodes.Text('\n'))
+        return production_node
+
+    def make_name_target(
+        self,
+        *,
+        name: str,
+        production_group: str,
+        location: str,
+    ) -> addnodes.literal_strong:
+        """Make a link target for the given production."""
+        name_node = addnodes.literal_strong(name, name)
+        prefix = f'grammar-token-{production_group}'
+        node_id = make_id(self.env, self.state.document, prefix, name)
+        name_node['ids'].append(node_id)
+        self.state.document.note_implicit_target(name_node, name_node)
+        obj_name = f'{production_group}:{name}' if production_group else name
+        std = self.env.domains.standard_domain
+        std.note_object('token', obj_name, node_id, location=location)
+        return name_node
+
+    @staticmethod
+    def separator_node(*, name: str, max_len: int) -> nodes.Text:
+        """Return seperator between 'name' and 'tokens'."""
+        if name:
+            return nodes.Text(' ::= '.rjust(max_len - len(name) + 5))
+        return nodes.Text(' ' * (max_len + 5))
+
 
 class TokenXRefRole(XRefRole):
     def process_link(

diff --git a/sphinx/texinputs/sphinxlatexobjects.sty b/sphinx/texinputs/sphinxlatexobjects.sty
@@ -1,7 +1,7 @@
 %% MODULE RELEASE DATA AND OBJECT DESCRIPTIONS
 %
 % change this info string if making any custom modification
-\ProvidesPackage{sphinxlatexobjects}[2023/07/23 documentation environments]
+\ProvidesPackage{sphinxlatexobjects}[2025/02/11 documentation environments]
 
 % Provides support for this output mark-up from Sphinx latex writer:
 %
@@ -279,18 +279,37 @@
 \newcommand{\pysigstopmultiline}{\sphinxsigismultilinefalse\itemsep\sphinxsignaturesep}%
 
 % Production lists
+% This simply outputs the lines as is, in monospace font.  Refers #13326.
+% (the left padding for multi-line alignment is from the nodes themselves,
+%  and latex is configured below to obey such horizontal whitespace).
+%
+% - The legacy code used longtable and hardcoded the separator as ::=
+%   via dedicated macros defined by the environment itself.
+% - Here the separator is part of the node.  Any extra LaTeX mark-up would
+%   have to originate from the writer itself to decorate it.
+% - The legacy code used strangely \parindent and \indent.  Possibly
+%   (unchecked) due to an earlier tabular usage, but a longtable does not
+%   work in paragraph mode, so \parindent was without effect and
+%   \indent only caused some extra blank line above display.
+% - The table had some whitespace on its left, which we imitate here via
+%   \parindent usage (which works in our context...).
 %
 \newenvironment{productionlist}{%
-%  \def\sphinxoptional##1{{\Large[}##1{\Large]}}
-  \def\production##1##2{\\\sphinxcode{\sphinxupquote{##1}}&::=&\sphinxcode{\sphinxupquote{##2}}}%
-  \def\productioncont##1{\\& &\sphinxcode{\sphinxupquote{##1}}}%
-  \parindent=2em
-  \indent
-  \setlength{\LTpre}{0pt}%
-  \setlength{\LTpost}{0pt}%
-  \begin{longtable}[l]{lcl}
+  \bigskip      % imitate close enough legacy vertical whitespace, which was
+                % visibly excessive
+  \ttfamily     % needed for space tokens to have same width as letters
+  \parindent1em % width of a "quad", font-dependent, usually circa width of 2
+                % letters
+  \obeylines    % line in = line out
+  \parskip\z@skip % prevent the parskip vertical whitespace between lines,
+                  % which are technically to LaTeX now each its own paragraph
+  \@vobeyspaces % obey whitespace
+  % now a technicality to, only locally to this environment, prevent the
+  % suppression of indentation of first line, if it comes right after
+  % \section. Cf package indentfirst from which the code is borrowed.
+  \let\@afterindentfalse\@afterindenttrue\@afterindenttrue
 }{%
-  \end{longtable}
+  \par % does not hurt...
 }
 
 % Definition lists; requested by AMK for HOWTO documents.  Probably useful

diff --git a/sphinx/writers/html5.py b/sphinx/writers/html5.py
@@ -5,7 +5,7 @@
 import posixpath
 import re
 import urllib.parse
-from typing import TYPE_CHECKING, cast
+from typing import TYPE_CHECKING
 
 from docutils import nodes
 from docutils.writers.html5_polyglot import HTMLTranslator as BaseTranslator
@@ -17,8 +17,6 @@
 from sphinx.util.images import get_image_size
 
 if TYPE_CHECKING:
-    from collections.abc import Iterable
-
     from docutils.nodes import Element, Node, Text
 
     from sphinx.builders import Builder
@@ -695,23 +693,9 @@ def depart_literal(self, node: Element) -> None:
 
     def visit_productionlist(self, node: Element) -> None:
         self.body.append(self.starttag(node, 'pre'))
-        productionlist = cast('Iterable[addnodes.production]', node)
-        maxlen = max(len(production['tokenname']) for production in productionlist)
-        lastname = None
-        for production in productionlist:
-            if production['tokenname']:
-                lastname = production['tokenname'].ljust(maxlen)
-                self.body.append(self.starttag(production, 'strong', ''))
-                self.body.append(lastname + '</strong> ::= ')
-            elif lastname is not None:
-                self.body.append(' ' * (maxlen + 5))
-            production.walkabout(self)
-            self.body.append('\n')
-        self.body.append('</pre>\n')
-        raise nodes.SkipNode
 
     def depart_productionlist(self, node: Element) -> None:
-        pass
+        self.body.append('</pre>\n')
 
     def visit_production(self, node: Element) -> None:
         pass