Skip to content

Commit 909be0d

Browse files
authored
Merge pull request #11259 from sbidoul/drop-html5lib
Drop html5lib
2 parents 6d02fe2 + d3a318f commit 909be0d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+22
-13420
lines changed

news/10825.removal.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Remove the ``html5lib`` deprecated feature flag.

news/html5lib.vendor.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Remove vendored html5lib.

src/pip/_internal/cli/cmdoptions.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1013,7 +1013,6 @@ def check_list_path_option(options: Values) -> None:
10131013
default=[],
10141014
choices=[
10151015
"legacy-resolver",
1016-
"html5lib",
10171016
],
10181017
help=("Enable deprecated functionality, that will be removed in the future."),
10191018
)

src/pip/_internal/cli/req_command.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -499,5 +499,4 @@ def _build_package_finder(
499499
link_collector=link_collector,
500500
selection_prefs=selection_prefs,
501501
target_python=target_python,
502-
use_deprecated_html5lib="html5lib" in options.deprecated_features_enabled,
503502
)

src/pip/_internal/commands/index.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,6 @@ def _build_package_finder(
9797
link_collector=link_collector,
9898
selection_prefs=selection_prefs,
9999
target_python=target_python,
100-
use_deprecated_html5lib="html5lib" in options.deprecated_features_enabled,
101100
)
102101

103102
def get_available_package_versions(self, options: Values, args: List[Any]) -> None:

src/pip/_internal/commands/list.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,6 @@ def _build_package_finder(
149149
return PackageFinder.create(
150150
link_collector=link_collector,
151151
selection_prefs=selection_prefs,
152-
use_deprecated_html5lib="html5lib" in options.deprecated_features_enabled,
153152
)
154153

155154
def run(self, options: Values, args: List[str]) -> int:

src/pip/_internal/index/collector.py

Lines changed: 8 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
Union,
3030
)
3131

32-
from pip._vendor import html5lib, requests
32+
from pip._vendor import requests
3333
from pip._vendor.requests import Response
3434
from pip._vendor.requests.exceptions import RetryError, SSLError
3535

@@ -191,27 +191,6 @@ def _get_encoding_from_headers(headers: ResponseHeaders) -> Optional[str]:
191191
return None
192192

193193

194-
def _determine_base_url(document: HTMLElement, page_url: str) -> str:
195-
"""Determine the HTML document's base URL.
196-
197-
This looks for a ``<base>`` tag in the HTML document. If present, its href
198-
attribute denotes the base URL of anchor tags in the document. If there is
199-
no such tag (or if it does not have a valid href attribute), the HTML
200-
file's URL is used as the base URL.
201-
202-
:param document: An HTML document representation. The current
203-
implementation expects the result of ``html5lib.parse()``.
204-
:param page_url: The URL of the HTML document.
205-
206-
TODO: Remove when `html5lib` is dropped.
207-
"""
208-
for base in document.findall(".//base"):
209-
href = base.get("href")
210-
if href is not None:
211-
return href
212-
return page_url
213-
214-
215194
def _clean_url_path_part(part: str) -> str:
216195
"""
217196
Clean a "part" of a URL path (i.e. after splitting on "@" characters).
@@ -313,9 +292,7 @@ def __hash__(self) -> int:
313292

314293

315294
class ParseLinks(Protocol):
316-
def __call__(
317-
self, page: "IndexContent", use_deprecated_html5lib: bool
318-
) -> Iterable[Link]:
295+
def __call__(self, page: "IndexContent") -> Iterable[Link]:
319296
...
320297

321298

@@ -327,49 +304,20 @@ def with_cached_index_content(fn: ParseLinks) -> ParseLinks:
327304
"""
328305

329306
@functools.lru_cache(maxsize=None)
330-
def wrapper(
331-
cacheable_page: CacheablePageContent, use_deprecated_html5lib: bool
332-
) -> List[Link]:
333-
return list(fn(cacheable_page.page, use_deprecated_html5lib))
307+
def wrapper(cacheable_page: CacheablePageContent) -> List[Link]:
308+
return list(fn(cacheable_page.page))
334309

335310
@functools.wraps(fn)
336-
def wrapper_wrapper(
337-
page: "IndexContent", use_deprecated_html5lib: bool
338-
) -> List[Link]:
311+
def wrapper_wrapper(page: "IndexContent") -> List[Link]:
339312
if page.cache_link_parsing:
340-
return wrapper(CacheablePageContent(page), use_deprecated_html5lib)
341-
return list(fn(page, use_deprecated_html5lib))
313+
return wrapper(CacheablePageContent(page))
314+
return list(fn(page))
342315

343316
return wrapper_wrapper
344317

345318

346-
def _parse_links_html5lib(page: "IndexContent") -> Iterable[Link]:
347-
"""
348-
Parse an HTML document, and yield its anchor elements as Link objects.
349-
350-
TODO: Remove when `html5lib` is dropped.
351-
"""
352-
document = html5lib.parse(
353-
page.content,
354-
transport_encoding=page.encoding,
355-
namespaceHTMLElements=False,
356-
)
357-
358-
url = page.url
359-
base_url = _determine_base_url(document, url)
360-
for anchor in document.findall(".//a"):
361-
link = _create_link_from_element(
362-
anchor.attrib,
363-
page_url=url,
364-
base_url=base_url,
365-
)
366-
if link is None:
367-
continue
368-
yield link
369-
370-
371319
@with_cached_index_content
372-
def parse_links(page: "IndexContent", use_deprecated_html5lib: bool) -> Iterable[Link]:
320+
def parse_links(page: "IndexContent") -> Iterable[Link]:
373321
"""
374322
Parse a Simple API's Index Content, and yield its anchor elements as Link objects.
375323
"""
@@ -398,10 +346,6 @@ def parse_links(page: "IndexContent", use_deprecated_html5lib: bool) -> Iterable
398346
hashes=file.get("hashes", {}),
399347
)
400348

401-
if use_deprecated_html5lib:
402-
yield from _parse_links_html5lib(page)
403-
return
404-
405349
parser = HTMLLinkParser(page.url)
406350
encoding = page.encoding or "utf-8"
407351
parser.feed(page.content.decode(encoding))

src/pip/_internal/index/package_finder.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -598,7 +598,6 @@ def __init__(
598598
link_collector: LinkCollector,
599599
target_python: TargetPython,
600600
allow_yanked: bool,
601-
use_deprecated_html5lib: bool,
602601
format_control: Optional[FormatControl] = None,
603602
candidate_prefs: Optional[CandidatePreferences] = None,
604603
ignore_requires_python: Optional[bool] = None,
@@ -623,7 +622,6 @@ def __init__(
623622
self._ignore_requires_python = ignore_requires_python
624623
self._link_collector = link_collector
625624
self._target_python = target_python
626-
self._use_deprecated_html5lib = use_deprecated_html5lib
627625

628626
self.format_control = format_control
629627

@@ -640,8 +638,6 @@ def create(
640638
link_collector: LinkCollector,
641639
selection_prefs: SelectionPreferences,
642640
target_python: Optional[TargetPython] = None,
643-
*,
644-
use_deprecated_html5lib: bool,
645641
) -> "PackageFinder":
646642
"""Create a PackageFinder.
647643
@@ -666,7 +662,6 @@ def create(
666662
allow_yanked=selection_prefs.allow_yanked,
667663
format_control=selection_prefs.format_control,
668664
ignore_requires_python=selection_prefs.ignore_requires_python,
669-
use_deprecated_html5lib=use_deprecated_html5lib,
670665
)
671666

672667
@property
@@ -796,7 +791,7 @@ def process_project_url(
796791
if index_response is None:
797792
return []
798793

799-
page_links = list(parse_links(index_response, self._use_deprecated_html5lib))
794+
page_links = list(parse_links(index_response))
800795

801796
with indent_log():
802797
package_links = self.evaluate_links(

src/pip/_internal/self_outdated_check.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,6 @@ def _get_current_remote_pip_version(
173173
finder = PackageFinder.create(
174174
link_collector=link_collector,
175175
selection_prefs=selection_prefs,
176-
use_deprecated_html5lib=("html5lib" in options.deprecated_features_enabled),
177176
)
178177
best_candidate = finder.find_best_candidate("pip").best_candidate
179178
if best_candidate is None:

src/pip/_vendor/README.rst

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,6 @@ Modifications
104104
rather than ``appdirs``.
105105
* ``packaging`` has been modified to import its dependencies from
106106
``pip._vendor``.
107-
* ``html5lib`` has been modified to import six from ``pip._vendor``, to prefer
108-
importing from ``collections.abc`` instead of ``collections`` and does not
109-
import ``xml.etree.cElementTree`` on Python 3.
110107
* ``CacheControl`` has been modified to import its dependencies from
111108
``pip._vendor``.
112109
* ``requests`` has been modified to import its other dependencies from

0 commit comments

Comments
 (0)