Skip to content

Commit 4334dee

Browse files
committed
Use set instead of list for block level elements
Using a set allows for better performances when checking for membership of a tag within block level elements. Issue-1507: #1507
1 parent 6347c57 commit 4334dee

File tree

4 files changed

+61
-7
lines changed

4 files changed

+61
-7
lines changed

markdown/core.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ class Markdown:
5050
Attributes:
5151
Markdown.tab_length (int): The number of spaces which correspond to a single tab. Default: `4`.
5252
Markdown.ESCAPED_CHARS (list[str]): List of characters which get the backslash escape treatment.
53-
Markdown.block_level_elements (list[str]): List of HTML tags which get treated as block-level elements.
53+
Markdown.block_level_elements (set[str]): Set of HTML tags which get treated as block-level elements.
5454
See [`markdown.util.BLOCK_LEVEL_ELEMENTS`][] for the full list of elements.
5555
Markdown.registeredExtensions (list[Extension]): List of extensions which have called
5656
[`registerExtension`][markdown.Markdown.registerExtension] during setup.
@@ -113,7 +113,7 @@ def __init__(self, **kwargs):
113113
]
114114
""" List of characters which get the backslash escape treatment. """
115115

116-
self.block_level_elements: list[str] = BLOCK_LEVEL_ELEMENTS.copy()
116+
self.block_level_elements: set[str] = BLOCK_LEVEL_ELEMENTS.copy()
117117

118118
self.registeredExtensions: list[Extension] = []
119119
self.docType = "" # TODO: Maybe delete this. It does not appear to be used anymore.

markdown/extensions/md_in_html.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ class HTMLExtractorExtra(HTMLExtractor):
4242

4343
def __init__(self, md: Markdown, *args, **kwargs):
4444
# All block-level tags.
45-
self.block_level_tags = set(md.block_level_elements.copy())
45+
self.block_level_tags = md.block_level_elements.copy()
4646
# Block-level tags in which the content only gets span level parsing
4747
self.span_tags = set(
4848
['address', 'dd', 'dt', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'legend', 'li', 'p', 'summary', 'td', 'th']

markdown/util.py

Lines changed: 57 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
from __future__ import annotations
2626

27+
from collections.abc import Callable
2728
import re
2829
import sys
2930
import warnings
@@ -44,7 +45,60 @@
4445
"""
4546

4647

47-
BLOCK_LEVEL_ELEMENTS: list[str] = [
48+
class _BlockLevelElements(set):
49+
# ----------------------------------
50+
# Methods common to `list` and `set`
51+
# ----------------------------------
52+
def copy(self) -> set[str]:
53+
return _BlockLevelElements(super().copy())
54+
55+
def pop(self, index: int | None = None, /) -> str:
56+
if index is not None:
57+
warnings.warn("The index argument is deprecated and will be removed in the future", DeprecationWarning)
58+
try:
59+
return self.pop()
60+
except KeyError:
61+
warnings.warn("`pop` will raise a `KeyError` in the future", DeprecationWarning)
62+
raise IndexError("pop from an empty set") from None
63+
64+
def remove(self, element: object) -> None:
65+
try:
66+
return self.remove(element)
67+
except KeyError:
68+
warnings.warn("`remove` will raise a `KeyError` in the future", DeprecationWarning)
69+
raise ValueError(f"{element!r} not in set") from None
70+
71+
# --------------------------
72+
# Methods specific to `list`
73+
# --------------------------
74+
def append(self, element: str) -> None:
75+
warnings.warn("method `append` will be removed in the future", DeprecationWarning)
76+
self.add(element)
77+
78+
def count(self, value: str) -> int:
79+
warnings.warn("method `count` will be removed in the future", DeprecationWarning)
80+
return 1 if value in self else 0
81+
82+
def extend(self, elements: list[str]) -> None:
83+
warnings.warn("method `extend` will be removed in the future", DeprecationWarning)
84+
self.update(elements)
85+
86+
def index(self, value, start=0, stop=0, /) -> int:
87+
warnings.warn("method `index` will be removed in the future", DeprecationWarning)
88+
return 0 if value in self else -1
89+
90+
def insert(self, index: int, element: str) -> None:
91+
warnings.warn("method `insert` will be removed in the future", DeprecationWarning)
92+
self.add(element)
93+
94+
def reverse(self) -> None:
95+
warnings.warn("method `reverse` will be removed in the future", DeprecationWarning)
96+
97+
def sort(self, /, *, key: Callable | None = None, reverse: bool = False) -> None:
98+
warnings.warn("method `sort` will be removed in the future", DeprecationWarning)
99+
100+
101+
BLOCK_LEVEL_ELEMENTS: set[str] = _BlockLevelElements({
48102
# Elements which are invalid to wrap in a `<p>` tag.
49103
# See https://w3c.github.io/html/grouping-content.html#the-p-element
50104
'address', 'article', 'aside', 'blockquote', 'details', 'div', 'dl',
@@ -56,9 +110,9 @@
56110
'math', 'map', 'noscript', 'output', 'object', 'option', 'progress', 'script',
57111
'style', 'summary', 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'video',
58112
'center'
59-
]
113+
})
60114
"""
61-
List of HTML tags which get treated as block-level elements. Same as the `block_level_elements`
115+
Set of HTML tags which get treated as block-level elements. Same as the `block_level_elements`
62116
attribute of the [`Markdown`][markdown.Markdown] class. Generally one should use the
63117
attribute on the class. This remains for compatibility with older extensions.
64118
"""

tests/test_apis.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -920,7 +920,7 @@ class TestBlockAppend(unittest.TestCase):
920920
def testBlockAppend(self):
921921
""" Test that appended escapes are only in the current instance. """
922922
md = markdown.Markdown()
923-
md.block_level_elements.append('test')
923+
md.block_level_elements.add('test')
924924
self.assertEqual('test' in md.block_level_elements, True)
925925
md2 = markdown.Markdown()
926926
self.assertEqual('test' not in md2.block_level_elements, True)

0 commit comments

Comments
 (0)