|
54 | 54 | import tempfile |
55 | 55 | import time |
56 | 56 | import zlib |
57 | | -from xml.etree import ElementTree |
58 | 57 |
|
59 | 58 | from easybuild.base import fancylogger |
60 | 59 | from easybuild.tools import run |
61 | 60 | # import build_log must stay, to use of EasyBuildLog |
62 | 61 | from easybuild.tools.build_log import EasyBuildError, dry_run_msg, print_msg, print_warning |
63 | 62 | from easybuild.tools.config import DEFAULT_WAIT_ON_LOCK_INTERVAL, GENERIC_EASYBLOCK_PKG, build_option, install_path |
64 | | -from easybuild.tools.py2vs3 import std_urllib, string_type |
| 63 | +from easybuild.tools.py2vs3 import HTMLParser, std_urllib, string_type |
65 | 64 | from easybuild.tools.utilities import nub, remove_unwanted_chars |
66 | 65 |
|
67 | 66 | try: |
@@ -519,15 +518,21 @@ def pypi_source_urls(pkg_name): |
519 | 518 | else: |
520 | 519 | urls_txt = read_file(urls_html) |
521 | 520 |
|
522 | | - # ignore yanked releases (see https://pypi.org/help/#yanked) |
523 | | - # see https://github.com/easybuilders/easybuild-framework/issues/3301 |
524 | | - urls_txt = re.sub(r'<a.*?data-yanked.*?</a>', '', urls_txt) |
| 521 | + res = [] |
525 | 522 |
|
526 | | - parsed_html = ElementTree.ElementTree(ElementTree.fromstring(urls_txt)) |
527 | | - if hasattr(parsed_html, 'iter'): |
528 | | - res = [a.attrib['href'] for a in parsed_html.iter('a')] |
529 | | - else: |
530 | | - res = [a.attrib['href'] for a in parsed_html.getiterator('a')] |
| 523 | + # note: don't use xml.etree.ElementTree to parse HTML page served by PyPI's simple API |
| 524 | + # cfr. https://github.com/pypa/warehouse/issues/7886 |
| 525 | + class HrefHTMLParser(HTMLParser): |
| 526 | + """HTML parser to extract 'href' attribute values from anchor tags (<a href='...'>).""" |
| 527 | + |
| 528 | + def handle_starttag(self, tag, attrs): |
| 529 | + if tag == 'a': |
| 530 | + attrs = dict(attrs) |
| 531 | + if 'href' in attrs: |
| 532 | + res.append(attrs['href']) |
| 533 | + |
| 534 | + parser = HrefHTMLParser() |
| 535 | + parser.feed(urls_txt) |
531 | 536 |
|
532 | 537 | # links are relative, transform them into full URLs; for example: |
533 | 538 | # from: ../../packages/<dir1>/<dir2>/<hash>/easybuild-<version>.tar.gz#md5=<md5> |
|
0 commit comments