Skip to content

Commit ad25c4b

Browse files
authored
Merge pull request #2008 from apache/tristan/prefer-better-version-matches
Prefer better version matches
2 parents 0a72487 + 15e9f77 commit ad25c4b

File tree

6 files changed

+94
-30
lines changed

6 files changed

+94
-30
lines changed

src/buildstream/_frontend/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,7 @@ def help_command(ctx, command):
409409
@click.option(
410410
"--min-version",
411411
type=click.STRING,
412-
default="2.4",
412+
default="2.5",
413413
show_default=True,
414414
help="The required format version",
415415
)

src/buildstream/downloadablefilesource.py

Lines changed: 10 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,9 @@
4343
version string from the specified URI, in order to fill out the reported
4444
:attr:`~buildstream.source.SourceInfo.version_guess`.
4545
46-
The URI will be *searched* using this regular expression, and is allowed to
47-
yield a number of *groups*. For example the value ``(\\d+)_(\\d+)_(\\d+)`` would
48-
report 3 *groups* if 3 numerical values separated by underscores were found in
49-
the URI.
50-
51-
The default value for ``version-guess-pattern`` is ``\\d+\\.\\d+(?:\\.\\d+)?``.
46+
This is done using the :func:`utils.guess_version() <buildstream.utils.guess_version>`
47+
utility function, please refer to that function documentation to understand how
48+
the guessing mechanics works, and what kind of string you should provide here.
5249
5350
.. note:
5451
@@ -140,7 +137,7 @@ def translate_url(
140137
for which it reports the sha256 checksum of the remote file content as the *version*.
141138
142139
An attempt to guess the version based on the remote filename will be made
143-
for the reporting of the *guess_version*. Control over how the guess is made
140+
for the reporting of the *version_guess*. Control over how the guess is made
144141
or overridden is explained above in the
145142
:ref:`built-in functionality documentation <core_downloadable_source_builtins>`.
146143
"""
@@ -268,7 +265,6 @@ class DownloadableFileSource(Source):
268265
COMMON_CONFIG_KEYS = Source.COMMON_CONFIG_KEYS + ["url", "ref", "version-guess-pattern", "version"]
269266

270267
__default_mirror_file = None
271-
__default_guess_pattern = re.compile(r"\d+\.\d+(?:\.\d+)?")
272268

273269
def configure(self, node):
274270
self.original_url = node.get_str("url")
@@ -281,9 +277,8 @@ def configure(self, node):
281277
self._mirror_dir = os.path.join(self.get_mirror_directory(), utils.url_directory_name(self.original_url))
282278

283279
self._guess_pattern_string = node.get_str("version-guess-pattern", None)
284-
if self._guess_pattern_string is None:
285-
self._guess_pattern = self.__default_guess_pattern
286-
else:
280+
self._guess_pattern = None
281+
if self._guess_pattern_string is not None:
287282
self._guess_pattern = re.compile(self._guess_pattern_string)
288283

289284
self._version = node.get_str("version", None)
@@ -298,7 +293,7 @@ def get_unique_key(self):
298293
# attributes which affect SourceInfo generation.
299294
if self._version is not None:
300295
unique_key.append(self._version)
301-
elif self._guess_pattern is not self.__default_guess_pattern:
296+
elif self._guess_pattern_string is not None:
302297
unique_key.append(self._guess_pattern_string)
303298

304299
return unique_key
@@ -352,16 +347,9 @@ def fetch(self): # pylint: disable=arguments-differ
352347
)
353348

354349
def collect_source_info(self):
355-
if self._version is None:
356-
version_match = self._guess_pattern.search(self.original_url)
357-
if not version_match:
358-
version_guess = None
359-
elif self._guess_pattern.groups == 0:
360-
version_guess = version_match.group(0)
361-
else:
362-
version_guess = ".".join(version_match.groups())
363-
else:
364-
version_guess = self._version
350+
version_guess = self._version
351+
if version_guess is None:
352+
version_guess = utils.guess_version(self.original_url, pattern=self._guess_pattern)
365353

366354
return [
367355
self.create_source_info(

src/buildstream/source.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1089,7 +1089,7 @@ def collect_source_info(self) -> Iterable[SourceInfo]:
10891089
.. note::
10901090
10911091
If your plugin uses :class:`.SourceFetcher` objects, you can implement
1092-
:func:`Source.collect_source_info() <buildstream.source.SourceFetcher.get_source_info>` instead.
1092+
:func:`Source.get_source_info() <buildstream.source.SourceFetcher.get_source_info>` instead.
10931093
10941094
*Since: 2.5*
10951095
"""

src/buildstream/utils.py

Lines changed: 80 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
import itertools
3636
from contextlib import contextmanager
3737
from pathlib import Path
38-
from typing import Callable, IO, Iterable, Iterator, Optional, Tuple, Union
38+
from typing import Callable, IO, Iterable, Iterator, Optional, Tuple, Union, Pattern
3939
from google.protobuf import timestamp_pb2
4040

4141
import psutil
@@ -66,6 +66,10 @@
6666
# it might not work
6767
_USE_CP_FILE_RANGE = hasattr(os, "copy_file_range")
6868

69+
# The default version guessing pattern for utils.guess_version()
70+
#
71+
_DEFAULT_GUESS_PATTERN = re.compile(r"(\d+)\.(\d+)(?:\.(\d+))?")
72+
6973

7074
class UtilError(BstError):
7175
"""Raised by utility functions when system calls fail.
@@ -697,15 +701,87 @@ def cleanup_tempfile():
697701

698702
# get_umask():
699703
#
700-
# Get the process's file mode creation mask without changing it.
704+
#
701705
#
702706
# Returns:
703-
# (int) The process's file mode creation mask.
707+
# (int)
704708
#
705-
def get_umask():
709+
def get_umask() -> int:
710+
"""
711+
Get the process's file mode creation mask without changing it.
712+
713+
Returns: The process's file mode creation mask.
714+
"""
706715
return _UMASK
707716

708717

718+
def guess_version(string: str, *, pattern: Optional[Pattern[str]] = None) -> Optional[str]:
719+
"""
720+
Attempt to extract a version from an arbitrary string.
721+
722+
This function is used by sources who implement
723+
:func:`Source.get_source_info() <buildstream.source.SourceFetcher.get_source_info>`
724+
in order to provide a guess at what the version is, given some domain specific
725+
knowledge such as a git tag or a tarball URL.
726+
727+
This function will be traverse the provided string for non-overlapping matches, and
728+
in the case of *optional groups* being specified in the pattern; the match with the
729+
greatest amount of matched groups will be preferred, allowing for correct handling
730+
of cases like: ``https://example.com/releases/1.2/release-1.2.3.tgz`` which may
731+
match the *pattern* multiple times.
732+
733+
The resulting version will be the captured groups, separated by ``.`` characters.
734+
735+
Args:
736+
string: The domain specific string to scan for a version
737+
pattern: A compiled regex pattern to scan *string*, or None for the default ``(\\d+)\\.(\\d+)(?:\\.(\\d+))?``.
738+
739+
Returns:
740+
The guessed version, or None if no match was found.
741+
742+
.. note::
743+
744+
**Specifying a pattern**
745+
746+
When specifying the pattern, any number of capture groups may be specified, and
747+
the match containing the most matching groups will be selected.
748+
749+
The capture groups must contain only the intended result and not any separating
750+
characters.
751+
752+
For example, you may parse a string such as ``release-1_2_3-r2`` with the pattern:
753+
``(\\d+)_(\\d+)(?:_(\\d+))?(?:\\-(r\\d+))?``, and this would produce the parsed
754+
version ``1.2.3.r2``.
755+
756+
**Since: 2.5**.
757+
"""
758+
version_guess: Optional[str] = None
759+
version_guess_groups = 0
760+
761+
if pattern is None:
762+
pattern = _DEFAULT_GUESS_PATTERN
763+
764+
# Iterate over non-overlapping matches, and prefer a match which is more qualified (i.e. 1.2.3 is better than 1.2)
765+
for version_match in pattern.finditer(string):
766+
767+
if not version_match:
768+
iter_guess = None
769+
iter_n_groups = 0
770+
elif pattern.groups == 0:
771+
iter_guess = str(version_match.group(0))
772+
iter_n_groups = 1
773+
else:
774+
iter_groups = [group for group in version_match.groups() if group is not None]
775+
iter_n_groups = len(iter_groups)
776+
iter_guess = ".".join(iter_groups)
777+
778+
if version_guess is None or iter_n_groups > version_guess_groups:
779+
version_guess = iter_guess
780+
version_guess_groups = iter_n_groups
781+
782+
return version_guess
783+
784+
709785
# _get_host_tool_internal():
710786
#
711787
# Get the full path of a host tool, including tools bundled inside the Python package.

tests/frontend/show.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -594,7 +594,7 @@ def test_invalid_alias(cli, tmpdir, datafiles):
594594
(
595595
"tar.bst",
596596
"tar",
597-
"https://flying-ponies.com/releases/pony-flight-1.2.3.tgz",
597+
"https://flying-ponies.com/releases/1.2/pony-flight-1.2.3.tgz",
598598
"remote-file",
599599
"sha256",
600600
"9d0c936c78d0dfe3a67cae372c9a2330476ea87a2eec16b2daada64a664ca501",

tests/frontend/source-info/elements/tar.bst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@ kind: import
22

33
sources:
44
- kind: tar
5-
url: https://flying-ponies.com/releases/pony-flight-1.2.3.tgz
5+
url: https://flying-ponies.com/releases/1.2/pony-flight-1.2.3.tgz
66
ref: 9d0c936c78d0dfe3a67cae372c9a2330476ea87a2eec16b2daada64a664ca501

0 commit comments

Comments
 (0)