diff --git a/recipe/meta.yaml b/recipe/meta.yaml index d35379005..d87c7d2e2 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -4,7 +4,7 @@ {% set ver2 = '.'.join(version.split('.')[0:2]) %} {% set ver2nd = ''.join(version.split('.')[0:2]) %} {% set ver3nd = ''.join(version.split('.')[0:3]) %} -{% set build_number = 0 %} +{% set build_number = 1 %} # this makes the linter happy {% set channel_targets = channel_targets or 'conda-forge main' %} diff --git a/recipe/patches/0027-gh-135462-Fix-quadratic-complexity-in-processing-spe.patch b/recipe/patches/0027-gh-135462-Fix-quadratic-complexity-in-processing-spe.patch new file mode 100644 index 000000000..6f807df0d --- /dev/null +++ b/recipe/patches/0027-gh-135462-Fix-quadratic-complexity-in-processing-spe.patch @@ -0,0 +1,239 @@ +From 8da82ac397729a33bf005a6000458b4f832fc5ca Mon Sep 17 00:00:00 2001 +From: Serhiy Storchaka +Date: Fri, 13 Jun 2025 19:57:48 +0300 +Subject: [PATCH 27/27] gh-135462: Fix quadratic complexity in processing + special input in HTMLParser (GH-135464) + +End-of-file errors are now handled according to the HTML5 specs -- +comments and declarations are automatically closed, tags are ignored. +(cherry picked from commit 6eb6c5dbfb528bd07d77b60fd71fd05d81d45c41) + +Co-authored-by: Serhiy Storchaka +--- + Lib/html/parser.py | 41 +++++--- + Lib/test/test_htmlparser.py | 95 ++++++++++++++++--- + ...-06-13-15-55-22.gh-issue-135462.KBeJpc.rst | 4 + + 3 files changed, 117 insertions(+), 23 deletions(-) + create mode 100644 Misc/NEWS.d/next/Security/2025-06-13-15-55-22.gh-issue-135462.KBeJpc.rst + +diff --git a/Lib/html/parser.py b/Lib/html/parser.py +index 58f6bb3b1e9..94f4aaecfc6 100644 +--- a/Lib/html/parser.py ++++ b/Lib/html/parser.py +@@ -25,6 +25,7 @@ + charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]') + + starttagopen = re.compile('<[a-zA-Z]') ++endtagopen = re.compile('') + commentclose = re.compile(r'--\s*>') + # Note: +@@ -176,7 +177,7 @@ def goahead(self, end): + k = self.parse_pi(i) + elif startswith("', i + 1) +- if k < 0: +- k = rawdata.find('<', i + 1) +- if k < 0: +- k = i + 1 +- else: +- k += 1 +- if self.convert_charrefs and not self.cdata_elem: +- self.handle_data(unescape(rawdata[i:k])) ++ if starttagopen.match(rawdata, i): # < + letter ++ pass ++ elif startswith("'), +- ('comment', '/img'), +- ('endtag', 'html<')]) ++ ('data', '\n')]) + + def test_starttag_junk_chars(self): ++ self._run_check("<", [('data', '<')]) ++ self._run_check("<>", [('data', '<>')]) ++ self._run_check("< >", [('data', '< >')]) ++ self._run_check("< ", [('data', '< ')]) + self._run_check("", []) ++ self._run_check("<$>", [('data', '<$>')]) + self._run_check("", [('comment', '$')]) + self._run_check("", [('endtag', 'a')]) ++ self._run_check("", [('starttag', 'a", [('endtag', 'a'", [('data', "'", []) ++ self._run_check("", [('starttag', 'a$b', [])]) + self._run_check("", [('startendtag', 'a$b', [])]) + self._run_check("", [('starttag', 'a$b', [])]) + self._run_check("", [('startendtag', 'a$b', [])]) ++ self._run_check("", [('endtag', 'a$b')]) + + def test_slashes_in_starttag(self): + self._run_check('', [('startendtag', 'a', [('foo', 'var')])]) +@@ -537,13 +545,56 @@ def test_EOF_in_charref(self): + for html, expected in data: + self._run_check(html, expected) + +- def test_broken_comments(self): +- html = ('' ++ def test_eof_in_comments(self): ++ data = [ ++ ('', [('comment', '-!>')]), ++ ('' + '' + '' + '') + expected = [ ++ ('comment', 'ELEMENT br EMPTY'), + ('comment', ' not really a comment '), + ('comment', ' not a comment either --'), + ('comment', ' -- close enough --'), +@@ -598,6 +649,26 @@ def test_convert_charrefs_dropped_text(self): + ('endtag', 'a'), ('data', ' bar & baz')] + ) + ++ @support.requires_resource('cpu') ++ def test_eof_no_quadratic_complexity(self): ++ # Each of these examples used to take about an hour. ++ # Now they take a fraction of a second. ++ def check(source): ++ parser = html.parser.HTMLParser() ++ parser.feed(source) ++ parser.close() ++ n = 120_000 ++ check("