Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,16 @@ See the [Contributing Guide](contributing.md) for details.

## [Unreleased]

### Changed

* Footnotes are now ordered by the occurrence of their references in the
document. A new configuration option for the footnotes extension,
`USE_DEFINITION_ORDER`, has been added to support restoring the previous
behavior of ordering footnotes by the occurrence of definitions.

### Fixed

* Ensure inline processing iterates through elements in document order.
* Fix handling of incomplete HTML tags in code spans in Python 3.14.

## [3.8.2] - 2025-06-19
Expand Down
52 changes: 34 additions & 18 deletions docs/extensions/footnotes.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,26 +24,33 @@ the output.
Example:

```md
Footnotes[^1] have a label[^@#$%] and the footnote's content.
Footnotes have a name, a reference[^1], and a definition[^word].

[^1]: This is a footnote content.
[^@#$%]: A footnote on the label: "@#$%".
[^1]: This is a footnote definition.
[^word]: A footnote with the name "word".
```

A footnote label must start with a caret `^` and may contain any inline text
(including spaces) between a set of square brackets `[]`. Only the first
caret has any special meaning.

A footnote content must start with the label followed by a colon and at least
one space. The label used to define the content must exactly match the label used
in the body (including capitalization and white space). The content would then
follow the label either on the same line or on the next line. The content may
contain multiple lines, paragraphs, code blocks, blockquotes and most any other
markdown syntax. The additional lines must be indented one level (four spaces or
one tab).

When working with multiple blocks, it may be helpful to start the content on a
separate line from the label which defines the content. This way the entire block
A **footnote name** is a string that uniquely identifies a footnote within the
document. It may contain any character which is valid for an HTML id attribute
(including spaces). Examples: `1` in `[^1]`, `word` in `[^word]`,
and `@#$%` in `[^@#$%]`.

A **footnote reference** is a link within the text body to a footnote definition.
A footnote reference contains the footnote name prefixed by a caret `^` and enclosed
in square brackets `[]`. Examples: `[^1]` and `[^@#$%]`. In the output, footnote
references are replaced by a superscript number that links to the footnote definition.

A **footnote definition** must start with the corresponding footnote reference
followed by a colon and at least one space. The reference must exactly match
the reference used in the body (including capitalization and white space).
The content of the definition would then follow either on the same line
(`[^1]: This is a footnote definition.`) or on the next line.
Footnote definitions may contain multiple lines, paragraphs, code blocks,
blockquotes and most any other markdown syntax. The additional lines must be
indented one level (four spaces or one tab).

When working with multiple blocks, it may be helpful to start the definition on a
separate line from the reference which defines the content. This way the entire block
is indented consistently and any errors are more easily discernible by the author.

```md
Expand Down Expand Up @@ -98,6 +105,15 @@ The following options are provided to configure the output:
* **`SEPARATOR`**:
The text string used to set the footnote separator. Defaults to `:`.

* **`USE_DEFINITION_ORDER`**:
Whether to order footnotes by the occurrence of footnote definitions
in the document. Defaults to `False`.

Introduced in version 3.9.0, this option allows footnotes to be ordered
by the occurrence of their definitions in the document, rather than by the
order of their references in the text. This was the behavior of
previous versions of the extension.

A trivial example:

```python
Expand All @@ -109,7 +125,7 @@ Resetting Instance State

Footnote definitions are stored within the `markdown.Markdown` class instance between
multiple runs of the class. This allows footnotes from all runs to be included in
output, with links and references that are unique, even though the class has been
output, with links and references that are unique, even though the class has been
called multiple times.

However, if needed, the definitions can be cleared between runs by calling `reset`.
Expand Down
78 changes: 70 additions & 8 deletions markdown/extensions/footnotes.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
FN_BACKLINK_TEXT = util.STX + "zz1337820767766393qq" + util.ETX
NBSP_PLACEHOLDER = util.STX + "qq3936677670287331zz" + util.ETX
RE_REF_ID = re.compile(r'(fnref)(\d+)')
RE_REFERENCE = re.compile(r'(?<!!)\[\^([^\]]*)\](?!\s*:)')


class FootnoteExtension(Extension):
Expand Down Expand Up @@ -61,6 +62,9 @@ def __init__(self, **kwargs):
],
'SEPARATOR': [
':', 'Footnote separator.'
],
'USE_DEFINITION_ORDER': [
False, 'Whether to order footnotes by footnote content rather than by footnote label.'
]
}
""" Default configuration options. """
Expand All @@ -71,6 +75,9 @@ def __init__(self, **kwargs):
self.found_refs: dict[str, int] = {}
self.used_refs: set[str] = set()

# Backward compatibility with old '%d' placeholder
self.setConfig('BACKLINK_TITLE', self.getConfig("BACKLINK_TITLE").replace("%d", "{}"))

self.reset()

def extendMarkdown(self, md):
Expand All @@ -89,6 +96,12 @@ def extendMarkdown(self, md):
# `codehilite`) so they can run on the the contents of the div.
md.treeprocessors.register(FootnoteTreeprocessor(self), 'footnote', 50)

# Insert a tree-processor to reorder the footnotes if necessary. This must be after
# `inline` tree-processor so it can access the footnote reference order
# (`self.footnote_order`) that gets populated by the `FootnoteInlineProcessor`.
if not self.getConfig("USE_DEFINITION_ORDER"):
md.treeprocessors.register(FootnoteReorderingProcessor(self), 'footnote-reorder', 19)

# Insert a tree-processor that will run after inline is done.
# In this tree-processor we want to check our duplicate footnote tracker
# And add additional `backrefs` to the footnote pointing back to the
Expand All @@ -100,6 +113,7 @@ def extendMarkdown(self, md):

def reset(self) -> None:
""" Clear footnotes on reset, and prepare for distinct document. """
self.footnote_order: list[str] = []
self.footnotes: OrderedDict[str, str] = OrderedDict()
self.unique_prefix += 1
self.found_refs = {}
Expand Down Expand Up @@ -150,6 +164,11 @@ def setFootnote(self, id: str, text: str) -> None:
""" Store a footnote for later retrieval. """
self.footnotes[id] = text

def addFootnoteRef(self, id: str) -> None:
""" Store a footnote reference id in order of appearance. """
if id not in self.footnote_order:
self.footnote_order.append(id)

def get_separator(self) -> str:
""" Get the footnote separator. """
return self.getConfig("SEPARATOR")
Expand Down Expand Up @@ -180,9 +199,6 @@ def makeFootnotesDiv(self, root: etree.Element) -> etree.Element | None:
ol = etree.SubElement(div, "ol")
surrogate_parent = etree.Element("div")

# Backward compatibility with old '%d' placeholder
backlink_title = self.getConfig("BACKLINK_TITLE").replace("%d", "{}")

for index, id in enumerate(self.footnotes.keys(), start=1):
li = etree.SubElement(ol, "li")
li.set("id", self.makeFootnoteId(id))
Expand All @@ -198,7 +214,7 @@ def makeFootnotesDiv(self, root: etree.Element) -> etree.Element | None:
backlink.set("class", "footnote-backref")
backlink.set(
"title",
backlink_title.format(index)
self.getConfig('BACKLINK_TITLE').format(index)
)
backlink.text = FN_BACKLINK_TEXT

Expand All @@ -214,7 +230,7 @@ def makeFootnotesDiv(self, root: etree.Element) -> etree.Element | None:


class FootnoteBlockProcessor(BlockProcessor):
""" Find all footnote references and store for later use. """
""" Find footnote definitions and store for later use. """

RE = re.compile(r'^[ ]{0,3}\[\^([^\]]*)\]:[ ]*(.*)$', re.MULTILINE)

Expand All @@ -228,6 +244,7 @@ def test(self, parent: etree.Element, block: str) -> bool:
def run(self, parent: etree.Element, blocks: list[str]) -> bool:
""" Find, set, and remove footnote definitions. """
block = blocks.pop(0)

m = self.RE.search(block)
if m:
id = m.group(1)
Expand Down Expand Up @@ -312,14 +329,21 @@ def __init__(self, pattern: str, footnotes: FootnoteExtension):
def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | None, int | None, int | None]:
id = m.group(1)
if id in self.footnotes.footnotes.keys():
self.footnotes.addFootnoteRef(id)

if not self.footnotes.getConfig("USE_DEFINITION_ORDER"):
# Order by reference
footnote_num = self.footnotes.footnote_order.index(id) + 1
else:
# Order by definition
footnote_num = list(self.footnotes.footnotes.keys()).index(id) + 1

sup = etree.Element("sup")
a = etree.SubElement(sup, "a")
sup.set('id', self.footnotes.makeFootnoteRefId(id, found=True))
a.set('href', '#' + self.footnotes.makeFootnoteId(id))
a.set('class', 'footnote-ref')
a.text = self.footnotes.getConfig("SUPERSCRIPT_TEXT").format(
list(self.footnotes.footnotes.keys()).index(id) + 1
)
a.text = self.footnotes.getConfig("SUPERSCRIPT_TEXT").format(footnote_num)
return sup, m.start(0), m.end(0)
else:
return None, None, None
Expand Down Expand Up @@ -401,6 +425,44 @@ def run(self, root: etree.Element) -> None:
root.append(footnotesDiv)


class FootnoteReorderingProcessor(Treeprocessor):
""" Reorder list items in the footnotes div. """

def __init__(self, footnotes: FootnoteExtension):
self.footnotes = footnotes

def run(self, root: etree.Element) -> None:
if not self.footnotes.footnotes:
return
if self.footnotes.footnote_order != list(self.footnotes.footnotes.keys()):
for div in root.iter('div'):
if div.attrib.get('class', '') == 'footnote':
self.reorder_footnotes(div)
break

def reorder_footnotes(self, parent: etree.Element) -> None:
old_list = parent.find('ol')
parent.remove(old_list)
items = old_list.findall('li')

def order_by_id(li) -> int:
id = li.attrib.get('id', '').split(self.footnotes.get_separator(), 1)[-1]
return (
self.footnotes.footnote_order.index(id)
if id in self.footnotes.footnote_order
else len(self.footnotes.footnotes)
)

items = sorted(items, key=order_by_id)

new_list = etree.SubElement(parent, 'ol')

for index, item in enumerate(items, start=1):
backlink = item.find('.//a[@class="footnote-backref"]')
backlink.set("title", self.footnotes.getConfig("BACKLINK_TITLE").format(index))
new_list.append(item)


class FootnotePostprocessor(Postprocessor):
""" Replace placeholders with html entities. """
def __init__(self, footnotes: FootnoteExtension):
Expand Down
2 changes: 1 addition & 1 deletion markdown/treeprocessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ def run(self, tree: etree.Element, ancestors: list[str] | None = None) -> etree.
stack = [(tree, tree_parents)]

while stack:
currElement, parents = stack.pop()
currElement, parents = stack.pop(0)

self.ancestors = parents
self.__build_ancestors(currElement, self.ancestors)
Expand Down
Loading