diff --git a/docs/changelog.md b/docs/changelog.md index 3d54e6e2..7afa81bb 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -12,8 +12,16 @@ See the [Contributing Guide](contributing.md) for details. ## [Unreleased] +### Changed + +* Footnotes are now ordered by the occurrence of their references in the + document. A new configuration option for the footnotes extension, + `USE_DEFINITION_ORDER`, has been added to support restoring the previous + behavior of ordering footnotes by the occurrence of definitions. + ### Fixed +* Ensure inline processing iterates through elements in document order. * Fix handling of incomplete HTML tags in code spans in Python 3.14. ## [3.8.2] - 2025-06-19 diff --git a/docs/extensions/footnotes.md b/docs/extensions/footnotes.md index e841a324..7d033478 100644 --- a/docs/extensions/footnotes.md +++ b/docs/extensions/footnotes.md @@ -24,26 +24,33 @@ the output. Example: ```md -Footnotes[^1] have a label[^@#$%] and the footnote's content. +Footnotes have a name, a reference[^1], and a definition[^word]. -[^1]: This is a footnote content. -[^@#$%]: A footnote on the label: "@#$%". +[^1]: This is a footnote definition. +[^word]: A footnote with the name "word". ``` -A footnote label must start with a caret `^` and may contain any inline text -(including spaces) between a set of square brackets `[]`. Only the first -caret has any special meaning. - -A footnote content must start with the label followed by a colon and at least -one space. The label used to define the content must exactly match the label used -in the body (including capitalization and white space). The content would then -follow the label either on the same line or on the next line. The content may -contain multiple lines, paragraphs, code blocks, blockquotes and most any other -markdown syntax. The additional lines must be indented one level (four spaces or -one tab). - -When working with multiple blocks, it may be helpful to start the content on a -separate line from the label which defines the content. This way the entire block +A **footnote name** is a string that uniquely identifies a footnote within the +document. It may contain any character which is valid for an HTML id attribute +(including spaces). Examples: `1` in `[^1]`, `word` in `[^word]`, +and `@#$%` in `[^@#$%]`. + +A **footnote reference** is a link within the text body to a footnote definition. +A footnote reference contains the footnote name prefixed by a caret `^` and enclosed +in square brackets `[]`. Examples: `[^1]` and `[^@#$%]`. In the output, footnote +references are replaced by a superscript number that links to the footnote definition. + +A **footnote definition** must start with the corresponding footnote reference +followed by a colon and at least one space. The reference must exactly match +the reference used in the body (including capitalization and white space). +The content of the definition would then follow either on the same line +(`[^1]: This is a footnote definition.`) or on the next line. +Footnote definitions may contain multiple lines, paragraphs, code blocks, +blockquotes and most any other markdown syntax. The additional lines must be +indented one level (four spaces or one tab). + +When working with multiple blocks, it may be helpful to start the definition on a +separate line from the reference which defines the content. This way the entire block is indented consistently and any errors are more easily discernible by the author. ```md @@ -98,6 +105,15 @@ The following options are provided to configure the output: * **`SEPARATOR`**: The text string used to set the footnote separator. Defaults to `:`. +* **`USE_DEFINITION_ORDER`**: + Whether to order footnotes by the occurrence of footnote definitions + in the document. Defaults to `False`. + + Introduced in version 3.9.0, this option allows footnotes to be ordered + by the occurrence of their definitions in the document, rather than by the + order of their references in the text. This was the behavior of + previous versions of the extension. + A trivial example: ```python @@ -109,7 +125,7 @@ Resetting Instance State Footnote definitions are stored within the `markdown.Markdown` class instance between multiple runs of the class. This allows footnotes from all runs to be included in -output, with links and references that are unique, even though the class has been +output, with links and references that are unique, even though the class has been called multiple times. However, if needed, the definitions can be cleared between runs by calling `reset`. diff --git a/markdown/extensions/footnotes.py b/markdown/extensions/footnotes.py index 30c08113..13ecf7c2 100644 --- a/markdown/extensions/footnotes.py +++ b/markdown/extensions/footnotes.py @@ -33,6 +33,7 @@ FN_BACKLINK_TEXT = util.STX + "zz1337820767766393qq" + util.ETX NBSP_PLACEHOLDER = util.STX + "qq3936677670287331zz" + util.ETX RE_REF_ID = re.compile(r'(fnref)(\d+)') +RE_REFERENCE = re.compile(r'(? None: """ Clear footnotes on reset, and prepare for distinct document. """ + self.footnote_order: list[str] = [] self.footnotes: OrderedDict[str, str] = OrderedDict() self.unique_prefix += 1 self.found_refs = {} @@ -150,6 +164,11 @@ def setFootnote(self, id: str, text: str) -> None: """ Store a footnote for later retrieval. """ self.footnotes[id] = text + def addFootnoteRef(self, id: str) -> None: + """ Store a footnote reference id in order of appearance. """ + if id not in self.footnote_order: + self.footnote_order.append(id) + def get_separator(self) -> str: """ Get the footnote separator. """ return self.getConfig("SEPARATOR") @@ -180,9 +199,6 @@ def makeFootnotesDiv(self, root: etree.Element) -> etree.Element | None: ol = etree.SubElement(div, "ol") surrogate_parent = etree.Element("div") - # Backward compatibility with old '%d' placeholder - backlink_title = self.getConfig("BACKLINK_TITLE").replace("%d", "{}") - for index, id in enumerate(self.footnotes.keys(), start=1): li = etree.SubElement(ol, "li") li.set("id", self.makeFootnoteId(id)) @@ -198,7 +214,7 @@ def makeFootnotesDiv(self, root: etree.Element) -> etree.Element | None: backlink.set("class", "footnote-backref") backlink.set( "title", - backlink_title.format(index) + self.getConfig('BACKLINK_TITLE').format(index) ) backlink.text = FN_BACKLINK_TEXT @@ -214,7 +230,7 @@ def makeFootnotesDiv(self, root: etree.Element) -> etree.Element | None: class FootnoteBlockProcessor(BlockProcessor): - """ Find all footnote references and store for later use. """ + """ Find footnote definitions and store for later use. """ RE = re.compile(r'^[ ]{0,3}\[\^([^\]]*)\]:[ ]*(.*)$', re.MULTILINE) @@ -228,6 +244,7 @@ def test(self, parent: etree.Element, block: str) -> bool: def run(self, parent: etree.Element, blocks: list[str]) -> bool: """ Find, set, and remove footnote definitions. """ block = blocks.pop(0) + m = self.RE.search(block) if m: id = m.group(1) @@ -312,14 +329,21 @@ def __init__(self, pattern: str, footnotes: FootnoteExtension): def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | None, int | None, int | None]: id = m.group(1) if id in self.footnotes.footnotes.keys(): + self.footnotes.addFootnoteRef(id) + + if not self.footnotes.getConfig("USE_DEFINITION_ORDER"): + # Order by reference + footnote_num = self.footnotes.footnote_order.index(id) + 1 + else: + # Order by definition + footnote_num = list(self.footnotes.footnotes.keys()).index(id) + 1 + sup = etree.Element("sup") a = etree.SubElement(sup, "a") sup.set('id', self.footnotes.makeFootnoteRefId(id, found=True)) a.set('href', '#' + self.footnotes.makeFootnoteId(id)) a.set('class', 'footnote-ref') - a.text = self.footnotes.getConfig("SUPERSCRIPT_TEXT").format( - list(self.footnotes.footnotes.keys()).index(id) + 1 - ) + a.text = self.footnotes.getConfig("SUPERSCRIPT_TEXT").format(footnote_num) return sup, m.start(0), m.end(0) else: return None, None, None @@ -401,6 +425,44 @@ def run(self, root: etree.Element) -> None: root.append(footnotesDiv) +class FootnoteReorderingProcessor(Treeprocessor): + """ Reorder list items in the footnotes div. """ + + def __init__(self, footnotes: FootnoteExtension): + self.footnotes = footnotes + + def run(self, root: etree.Element) -> None: + if not self.footnotes.footnotes: + return + if self.footnotes.footnote_order != list(self.footnotes.footnotes.keys()): + for div in root.iter('div'): + if div.attrib.get('class', '') == 'footnote': + self.reorder_footnotes(div) + break + + def reorder_footnotes(self, parent: etree.Element) -> None: + old_list = parent.find('ol') + parent.remove(old_list) + items = old_list.findall('li') + + def order_by_id(li) -> int: + id = li.attrib.get('id', '').split(self.footnotes.get_separator(), 1)[-1] + return ( + self.footnotes.footnote_order.index(id) + if id in self.footnotes.footnote_order + else len(self.footnotes.footnotes) + ) + + items = sorted(items, key=order_by_id) + + new_list = etree.SubElement(parent, 'ol') + + for index, item in enumerate(items, start=1): + backlink = item.find('.//a[@class="footnote-backref"]') + backlink.set("title", self.footnotes.getConfig("BACKLINK_TITLE").format(index)) + new_list.append(item) + + class FootnotePostprocessor(Postprocessor): """ Replace placeholders with html entities. """ def __init__(self, footnotes: FootnoteExtension): diff --git a/markdown/treeprocessors.py b/markdown/treeprocessors.py index 83630999..9a27446d 100644 --- a/markdown/treeprocessors.py +++ b/markdown/treeprocessors.py @@ -368,7 +368,7 @@ def run(self, tree: etree.Element, ancestors: list[str] | None = None) -> etree. stack = [(tree, tree_parents)] while stack: - currElement, parents = stack.pop() + currElement, parents = stack.pop(0) self.ancestors = parents self.__build_ancestors(currElement, self.ancestors) diff --git a/tests/test_syntax/extensions/test_footnotes.py b/tests/test_syntax/extensions/test_footnotes.py index 6f504e39..070fa27f 100644 --- a/tests/test_syntax/extensions/test_footnotes.py +++ b/tests/test_syntax/extensions/test_footnotes.py @@ -336,3 +336,342 @@ def test_superscript_text(self): '', extension_configs={'footnotes': {'SUPERSCRIPT_TEXT': '[{}]'}} ) + + def test_footnote_order(self): + """Test that footnotes occur in order of reference appearance.""" + + self.assertMarkdownRenders( + self.dedent( + """ + First footnote reference[^first]. Second footnote reference[^last]. + + [^last]: Second footnote. + [^first]: First footnote. + """ + ), + '
First footnote reference1. Second footnote reference' + '2.
\n' + 'Footnote reference in code spans should be ignored[^tricky]
.\n'
+ 'A footnote reference'
+ '1.\n'
+ 'Another footnote reference'
+ '2.
First footnote reference2. Second footnote reference' + '1.
\n' + 'A code span with a footnote[^1] reference
.
A link with a footnote[^1] reference.
' + ) + + def test_footnote_reference_within_footnote_definition(self): + """Test footnote definition containing another footnote reference.""" + + self.assertMarkdownRenders( + self.dedent( + """ + Main footnote[^main]. + + [^main]: This footnote references another[^nested]. + [^nested]: Nested footnote. + """ + ), + 'Main footnote1.
\n' + '\n' + '\n' + 'This is a quote with a footnote' + '1.
\n' + '
Quote footnote. ↩
\n' + 'List footnote. ↩
\n' + 'A footnote reference' + '1' + ' within a span element.
\n' + 'The footnote. ↩
\n' + 'First' + '1 and second' + '1 reference.
\n' + '' + ) + + def test_footnote_reference_without_definition(self): + """Test footnote reference without corresponding definition.""" + + self.assertMarkdownRenders( + 'This has a missing footnote[^missing].', + 'This has a missing footnote[^missing].
' + ) + + def test_footnote_definition_without_reference(self): + """Test footnote definition without corresponding reference.""" + + self.assertMarkdownRenders( + self.dedent( + """ + No reference here. + + [^orphan]: Orphaned footnote. + """ + ), + 'No reference here.
\n' + 'Orphaned footnote. ↩
\n' + 'Special footnote id' + '1.
\n' + 'The footnote. ↩
\n' + '