Skip to content

Commit b65bc85

Browse files
authored
feat: upgrade the command spec to 0.31.2 (#198)
1 parent ca850b2 commit b65bc85

File tree

12 files changed

+259
-280
lines changed

12 files changed

+259
-280
lines changed

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
## v2.1.2(2024-06-21)
2+
3+
### Changed
4+
5+
- Update the GFM spec to the latest master branch.
6+
- Update the CommonMark spec to 0.31.2.
7+
18
## v2.1.1(2024-06-19)
29

310
### Fixed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,13 @@
55
[![PyPI](https://img.shields.io/pypi/v/marko.svg?logo=python&logoColor=white)](https://pypi.org/project/marko/)
66
[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/marko.svg?logo=python&logoColor=white)](https://pypi.org/project/marko/)
77
[![Documentation Status](https://img.shields.io/readthedocs/marko-py.svg?logo=readthedocs)](https://marko-py.readthedocs.io/en/latest/?badge=latest)
8-
[![CommonMark Spec](https://img.shields.io/badge/CommonMark-0.30-blue.svg)][spec]
8+
[![CommonMark Spec](https://img.shields.io/badge/CommonMark-0.31.2-blue.svg)][spec]
99

1010
![Build Status](https://github.com/frostming/marko/workflows/Tests/badge.svg)
1111
[![codecov](https://codecov.io/gh/frostming/marko/branch/master/graph/badge.svg)](https://codecov.io/gh/frostming/marko)
1212
[![Codacy Badge](https://api.codacy.com/project/badge/Grade/b785f5b3fa7c4d93a02372d31b3f73b1)](https://www.codacy.com/app/frostming/marko?utm_source=github.com&utm_medium=referral&utm_content=frostming/marko&utm_campaign=Badge_Grade)
1313

14-
Marko is a pure Python markdown parser that adheres to the specifications of [CommonMark's spec v0.30][spec]. It has been designed with high extensibility in mind, as detailed in the [Extensions](#extensions) section.
14+
Marko is a pure Python markdown parser that adheres to the specifications of [CommonMark's spec v0.31.2][spec]. It has been designed with high extensibility in mind, as detailed in the [Extensions](#extensions) section.
1515

1616
Marko requires Python 3.8 or higher.
1717

@@ -21,7 +21,7 @@ Of all the Python markdown parsers available, a common issue is the difficulty f
2121

2222
Marko's compliance with the complex CommonMark specification can impact its performance. However, using a parser that does not adhere to this spec may result in unexpected rendering outcomes. According to benchmark results, Marko is three times slower than Python-Markdown but slightly faster than Commonmark-py and significantly slower than mistune. If prioritizing performance over spec compliance is crucial for you, it would be best to opt for another parser.
2323

24-
[spec]: https://spec.commonmark.org/0.30/
24+
[spec]: https://spec.commonmark.org/0.31.2/
2525
[pymd]: https://github.com/waylan/Python-Markdown
2626
[mistune]: https://github.com/lepture/mistune
2727
[cmpy]: https://github.com/rtfd/CommonMark-py

marko/ext/gfm/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
GFM = MarkoExtension(
2424
elements=[
2525
elements.Paragraph,
26-
elements.InlineHTML,
2726
elements.Strikethrough,
2827
elements.Url,
2928
elements.Table,

marko/ext/gfm/elements.py

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import re
99
from typing import Any, cast
1010

11-
from marko import block, inline, patterns
11+
from marko import block, inline
1212
from marko.source import Source
1313

1414

@@ -24,18 +24,6 @@ def __init__(self, lines):
2424
self.inline_body = self.inline_body[m.end(1) :]
2525

2626

27-
class InlineHTML(inline.InlineHTML):
28-
pattern = re.compile(
29-
r"(<%s(?:%s)* */?>" # open tag
30-
r"|</%s *>" # closing tag
31-
r"|<!--(?:>|->|[\s\S]*?-->)" # HTML comment
32-
r"|<\?[\s\S]*?\?>" # processing instruction
33-
r"|<![A-Z]+ +[\s\S]*?>" # declaration
34-
r"|<!\[CDATA\[[\s\S]*?\]\]>)" # CDATA section
35-
% (patterns.tag_name, patterns.attribute, patterns.tag_name)
36-
)
37-
38-
3927
class Strikethrough(inline.InlineElement):
4028
pattern = re.compile(r"(?<!~)(~|~~)([^~]+)\1(?!~)")
4129
priority = 5

marko/inline.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ class InlineHTML(InlineElement):
9292
pattern = re.compile(
9393
r"(<%s(?:%s)* */?>" # open tag
9494
r"|</%s *>" # closing tag
95-
r"|<!--(?!>|->|[\s\S]*?--[\s\S]*?-->)[\s\S]*?(?<!-)-->" # HTML comment
95+
r"|<!--(?:>|->|[\s\S]*?-->)" # HTML comment
9696
r"|<\?[\s\S]*?\?>" # processing instruction
9797
r"|<![A-Z]+ +[\s\S]*?>" # declaration
9898
r"|<!\[CDATA\[[\s\S]*?\]\]>)" # CDATA section

marko/inline_parser.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -515,16 +515,12 @@ def is_right_flanking(self) -> bool:
515515
)
516516

517517
def followed_by_punc(self) -> bool:
518-
return (
519-
self.end < len(self.text)
520-
and patterns.punctuation.match(self.text, self.end) is not None
518+
return self.end < len(self.text) and patterns.is_punctuation(
519+
self.text[self.end]
521520
)
522521

523522
def preceded_by_punc(self) -> bool:
524-
return (
525-
self.start > 0
526-
and patterns.punctuation.match(self.text[self.start - 1]) is not None
527-
)
523+
return self.start > 0 and patterns.is_punctuation(self.text[self.start - 1])
528524

529525
def closed_by(self, other: Delimiter) -> bool:
530526
return not (

marko/patterns.py

Lines changed: 11 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@
22
Some regex patterns
33
"""
44

5+
import functools
56
import re
7+
import string
8+
import unicodedata
69

710
tags = [
811
"address",
@@ -86,30 +89,11 @@
8689
r"(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9]"
8790
r"(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*"
8891
)
89-
punctuation = re.compile(
90-
r'[!"#$%&\'()*+,\-./:;<=>?@\[\]\\^_`{|}~\xA1\xA7\xAB\xB6\xB7\xBB'
91-
r"\xBF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3"
92-
r"\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F"
93-
r"\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E"
94-
r"\u085E\u0964\u0965\u0970\u0AF0\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12"
95-
r"\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB"
96-
r"\u1360-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736"
97-
r"\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-"
98-
r"\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F"
99-
r"\u1CC0-\u1CC7\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051\u2053-\u205E"
100-
r"\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5"
101-
r"\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC"
102-
r"\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E42\u3001-\u3003\u3008-\u3011"
103-
r"\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673"
104-
r"\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E"
105-
r"\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0"
106-
r"\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63"
107-
r"\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B"
108-
r"\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]|\uD800[\uDD00-"
109-
r"\uDD02\uDF9F\uDFD0]|\uD801\uDD6F|\uD802[\uDC57\uDD1F\uDD3F\uDE50-\uDE58"
110-
r"\uDE7F\uDEF0-\uDEF6\uDF39-\uDF3F\uDF99-\uDF9C]|\uD804[\uDC47-\uDC4D"
111-
r"\uDCBB\uDCBC\uDCBE-\uDCC1\uDD40-\uDD43\uDD74\uDD75\uDDC5-\uDDC9\uDDCD"
112-
r"\uDDDB\uDDDD-\uDDDF\uDE38-\uDE3D\uDEA9]|\uD805[\uDCC6\uDDC1-\uDDD7"
113-
r"\uDE41-\uDE43\uDF3C-\uDF3E]|\uD809[\uDC70-\uDC74]|\uD81A[\uDE6E\uDE6F"
114-
r"\uDEF5\uDF37-\uDF3B\uDF44]|\uD82F\uDC9F|\uD836[\uDE87-\uDE8B]"
115-
)
92+
93+
94+
@functools.lru_cache(maxsize=128)
95+
def is_punctuation(ch: str) -> bool:
96+
if ch in string.punctuation:
97+
return True
98+
category = unicodedata.category(ch)
99+
return category.startswith("P") or category.startswith("S")

tests/__init__.py

Lines changed: 0 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,63 +0,0 @@
1-
# -*- coding: utf-8 -*-
2-
import codecs
3-
import os
4-
import re
5-
6-
from tests.normalize import normalize_html
7-
8-
TEST_ROOT = os.path.dirname(__file__)
9-
EXAMPLE_PATTERN = re.compile(
10-
r"^`{32} example\b.*?\n([\s\S]*?)^\.\n([\s\S]*?)^`{32}$|^#{1,6} *(.*)$",
11-
flags=re.M,
12-
)
13-
14-
15-
def parse_examples(text):
16-
data = EXAMPLE_PATTERN.findall(text)
17-
18-
section = None
19-
count = 0
20-
for md, html, title in data:
21-
if title:
22-
count = 0
23-
section = title.lower().split("(")[0].replace(" ", "_")
24-
25-
if md and html:
26-
count += 1
27-
name = "%s_%03d" % (section, count)
28-
md = md.replace("→", "\t")
29-
html = html.replace("→", "\t")
30-
yield name, md, html
31-
32-
33-
class SpecTestSuite:
34-
@classmethod
35-
def load_spec(cls, spec_name):
36-
def attach_case(n, md, html):
37-
def method(self):
38-
self.assert_case(md, html)
39-
40-
name = "test_{}".format(n)
41-
method.__name__ = name
42-
method.__doc__ = "Run spec {} - {}".format(spec_name, n)
43-
setattr(cls, name, method)
44-
45-
spec_file = os.path.join(TEST_ROOT, "spec/{}.txt".format(spec_name))
46-
with codecs.open(spec_file, encoding="utf-8") as f:
47-
for name, md, html in parse_examples(f.read()):
48-
if not cls.ignore_case(name):
49-
attach_case(name, md, html)
50-
51-
@classmethod
52-
def ignore_case(cls, n):
53-
return False
54-
55-
def assert_case(self, text, html):
56-
result = self.markdown(text)
57-
assert normalize_html(result) == normalize_html(html), repr(result)
58-
59-
# Extra cases that are not included
60-
def test_mixed_tab_space_in_list_item(self):
61-
text = "* foo\n\t* foo.bar"
62-
html = "<ul><li>foo<ul><li>foo.bar</li></ul></li></ul>"
63-
self.assert_case(text, html)

0 commit comments

Comments
 (0)