Skip to content

Commit a7fa529

Browse files
committed
Fix Alma finder to handle new vault HTML format
The top page of the AlmaLinux vault has changed from plain XML to a full HTML page. Update the Alma finder to parse the new format correctly. Drive-by fixes: - Add a CLI for AlmaLinux finder - recompile requirements - allow redirections for HEAD requests when testing URLs Issue: #68
1 parent 873fbb0 commit a7fa529

File tree

14 files changed

+888
-657
lines changed

14 files changed

+888
-657
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ dependencies = [
3939
"requests-oauthlib>=1.3.0",
4040
"lxml>=4.6.3",
4141
"dogpile.cache>=1.1.5",
42+
"beautifulsoup4",
4243
]
4344

4445
[project.optional-dependencies]

requirements/requirements-bootstrap.txt

Lines changed: 293 additions & 249 deletions
Large diffs are not rendered by default.

requirements/requirements-cli.txt

Lines changed: 164 additions & 118 deletions
Large diffs are not rendered by default.

requirements/requirements-test.txt

Lines changed: 196 additions & 150 deletions
Large diffs are not rendered by default.

requirements/requirements.txt

Lines changed: 164 additions & 118 deletions
Large diffs are not rendered by default.

soufi/cli.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,29 @@ def centos(
9797
)
9898
return cls.find(centos_finder)
9999

100+
@classmethod
101+
def almalinux(
102+
cls,
103+
name,
104+
version,
105+
repos=None,
106+
source_repos=None,
107+
binary_repos=None,
108+
timeout=None,
109+
):
110+
alma_finder = finder.factory(
111+
"almalinux",
112+
name=name,
113+
version=version,
114+
s_type=finder.SourceType.os,
115+
source_repos=source_repos,
116+
binary_repos=binary_repos,
117+
cache_backend="dogpile.cache.memory",
118+
cache_args=dict(cache_dict=LRU_CACHE),
119+
timeout=timeout,
120+
)
121+
return cls.find(alma_finder)
122+
100123
@classmethod
101124
def alpine(cls, name, version, aports_dir, timeout=None):
102125
alpine_finder = finder.factory(
@@ -243,7 +266,7 @@ def make_archive_from_discovery_source(disc_src, fname):
243266
"--repo",
244267
default=(),
245268
multiple=True,
246-
help="For CentOS, name of repo to use instead of defaults. "
269+
help="For CentOS/Almalinux, name of repo to use instead of defaults. "
247270
"Use 'optimal' to use an extended optimal set. May be repeated.",
248271
)
249272
@click.option(

soufi/finder.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,9 @@ def test_url(self, url, **kwargs):
246246

247247
def _head_url(self, url, **kwargs):
248248
def inner():
249-
response = requests.head(url, timeout=self.timeout, **kwargs)
249+
response = requests.head(
250+
url, timeout=self.timeout, allow_redirects=True, **kwargs
251+
)
250252
if response.status_code == requests.codes.not_allowed:
251253
# HEAD not available; we can try to download it instead and
252254
# abort before starting the stream.

soufi/finders/almalinux.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
import re
55

6-
from lxml import html
6+
from bs4 import BeautifulSoup
77

88
import soufi.finders.yum as yum_finder
99
from soufi import finder
@@ -25,21 +25,30 @@ class AlmaLinuxFinder(yum_finder.YumFinder):
2525

2626
distro = finder.Distro.almalinux.value
2727

28-
def _get_dirs(self):
29-
"""Get all the possible Vault dirs that could match."""
30-
content = self.get_url(VAULT).content
31-
tree = html.fromstring(content)
32-
# Ignore beta releases; we may want to make this a switchable behavior
33-
retval = tree.xpath("//a/text()[not(contains(.,'-beta'))]")
28+
def _get_vault_repo_versions(self):
29+
response = self.get_url(VAULT)
30+
soup = BeautifulSoup(response.text, "html.parser")
31+
versions = []
3432
# AlmaLinux Vault is fond of symlinking the current point release to a
3533
# directory with just the major version number, e.g., `6.10/`->`6/`.
3634
# This means that such directories are inherently unstable and their
3735
# contents are subject to change without notice, so we'll ignore
3836
# them in favour of the "full" names.
39-
dirs = [dir.rstrip('/') for dir in retval if re.match(r'\d+\.\d', dir)]
37+
for link in soup.find_all("a", href=re.compile(r"^./\d+\.\d")):
38+
version = link.get("href").rstrip("/")
39+
if '-beta' in version:
40+
# Ignore beta releases; we may want to make this a switchable
41+
# behaviour.
42+
continue
43+
if re.match(r"^./\d+\.\d+", version):
44+
versions.append(version[2:])
45+
return sorted(versions, reverse=True) # Sort to prioritize newer
4046

41-
# Walk the tree backwards, so that newer releases get searched first
42-
return reversed(dirs)
47+
def _get_dirs(self):
48+
"""Get all the possible Vault dirs that could match."""
49+
versions = self._get_vault_repo_versions()
50+
# Dir names are just the versions.
51+
return versions
4352

4453
def get_source_repos(self):
4554
"""Determine which source search paths are valid.

soufi/finders/java.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ def get_source_url(self):
4444
f'{self.name}-{self.version}-sources.jar'
4545
)
4646
found = self.test_url(
47-
MAVEN_REPO_URL, params=params, allow_redirects=True
47+
MAVEN_REPO_URL,
48+
params=params,
4849
)
4950
if not found:
5051
raise exceptions.SourceNotFound

soufi/testing/base.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,17 +94,23 @@ def extend_side_effects(self, mock_obj, extra_value):
9494
effects.append(extra_value)
9595
mock_obj.side_effect = effects
9696

97-
def patch_get_with_response(self, response_code, data=None, json=None):
97+
def patch_get_with_response(
98+
self, response_code, data=None, json=None, as_text=False
99+
):
98100
"""Patch `requests.get` with the provided values.
99101
100102
:param response_code: A requests.codes value, to mimic an HTTP status
101103
:param data: A string-like object to mimic Response.content
102104
:param json: A dict or list, to mimic what Response.json() would return
105+
:param as_text: If True, set Response.text instead of Response.content
103106
:return: The created MagicMock, to add side-effects, etc.
104107
"""
105108
fake_response = mock.MagicMock()
106109
fake_response.return_value.status_code = response_code
107-
fake_response.return_value.content = data
110+
if as_text:
111+
fake_response.return_value.text = data
112+
else:
113+
fake_response.return_value.content = data
108114
fake_response.return_value.json.return_value = json
109115
return self.patch(requests, 'get', fake_response)
110116

0 commit comments

Comments
 (0)