Skip to content

Commit 9a95eca

Browse files
authored
fix: create new version comparison function (#3470)
* fix: create new version comparison function We had been using packaging's version parsing tools, but as they move more towards pep440 compliance they aren't as useful for comparing arbitrary versions that may not follow the same scheme. This moves us to our own function. It may need some further tweaking for special cases such as release candidates or dev versions. Signed-off-by: Terri Oda <[email protected]>
1 parent 94bbae5 commit 9a95eca

File tree

6 files changed

+279
-132
lines changed

6 files changed

+279
-132
lines changed

cve_bin_tool/cve_scanner.py

Lines changed: 8 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,14 @@
11
# Copyright (C) 2021 Intel Corporation
22
# SPDX-License-Identifier: GPL-3.0-or-later
33

4-
import re
54
import sqlite3
65
import sys
76
from collections import defaultdict
87
from logging import Logger
98
from pathlib import Path
109
from string import ascii_lowercase
11-
from typing import DefaultDict, Dict, List, Tuple
10+
from typing import DefaultDict, Dict, List
1211

13-
from packaging.version import Version
14-
from packaging.version import parse as parse_version
1512
from rich.console import Console
1613

1714
from cve_bin_tool.cvedb import DBNAME, DISK_LOCATION_DEFAULT
@@ -20,6 +17,7 @@
2017
from cve_bin_tool.log import LOGGER
2118
from cve_bin_tool.theme import cve_theme
2219
from cve_bin_tool.util import CVE, CVEData, ProductInfo, VersionInfo
20+
from cve_bin_tool.version_compare import Version
2321

2422

2523
class CVEScanner:
@@ -99,12 +97,8 @@ def get_cves(self, product_info: ProductInfo, triage_data: TriageData):
9997
# Removing * from vendors that are guessed by the package list parser
10098
vendor = product_info.vendor.replace("*", "")
10199

102-
# Need to manipulate version to ensure canonical form of version
103-
104-
parsed_version, parsed_version_between = self.canonical_convert(product_info)
105-
# If canonical form of version numbering not found, exit
106-
if parsed_version == "UNKNOWN":
107-
return
100+
# Use our Version class to do version compares
101+
parsed_version = Version(product_info.version)
108102

109103
self.cursor.execute(query, [vendor, product_info.product, str(parsed_version)])
110104

@@ -133,29 +127,17 @@ def get_cves(self, product_info: ProductInfo, triage_data: TriageData):
133127
version_end_excluding,
134128
) = cve_range
135129

136-
# pep-440 doesn't include versions of the type 1.1.0g used by openssl
137-
# or versions of the type 9a used by libjpeg
138-
# so if this is openssl or libjpeg, convert the last letter to a .number
139-
if product_info.product in {"openssl", "libjpeg"}:
140-
# if last character is a letter, convert it to .number
141-
# version = self.letter_convert(product_info.version)
142-
version_start_including = self.letter_convert(version_start_including)
143-
version_start_excluding = self.letter_convert(version_start_excluding)
144-
version_end_including = self.letter_convert(version_end_including)
145-
version_end_excluding = self.letter_convert(version_end_excluding)
146-
parsed_version = parsed_version_between
147-
148130
# check the start range
149131
passes_start = False
150132
if (
151133
version_start_including is not self.RANGE_UNSET
152-
and parsed_version >= parse_version(version_start_including)
134+
and parsed_version >= Version(version_start_including)
153135
):
154136
passes_start = True
155137

156138
if (
157139
version_start_excluding is not self.RANGE_UNSET
158-
and parsed_version > parse_version(version_start_excluding)
140+
and parsed_version > Version(version_start_excluding)
159141
):
160142
passes_start = True
161143

@@ -170,13 +152,13 @@ def get_cves(self, product_info: ProductInfo, triage_data: TriageData):
170152
passes_end = False
171153
if (
172154
version_end_including is not self.RANGE_UNSET
173-
and parsed_version <= parse_version(version_end_including)
155+
and parsed_version <= Version(version_end_including)
174156
):
175157
passes_end = True
176158

177159
if (
178160
version_end_excluding is not self.RANGE_UNSET
179-
and parsed_version < parse_version(version_end_excluding)
161+
and parsed_version < Version(version_end_excluding)
180162
):
181163
passes_end = True
182164

@@ -313,57 +295,6 @@ def get_cves(self, product_info: ProductInfo, triage_data: TriageData):
313295
if product_info not in self.all_product_data:
314296
self.all_product_data[product_info] = len(cves)
315297

316-
def letter_convert(self, version: str) -> str:
317-
"""pkg_resources follows pep-440 which doesn't expect openssl style 1.1.0g version numbering
318-
or libjpeg style 9a version numbering
319-
So to fake it, if the last character is a letter, replace it with .number before comparing
320-
"""
321-
if not version: # if version is empty return it.
322-
return version
323-
324-
# Check for short string
325-
if len(version) < 2:
326-
return version
327-
328-
last_char = version[-1]
329-
second_last_char = version[-2]
330-
331-
if last_char in self.ALPHA_TO_NUM and second_last_char in self.ALPHA_TO_NUM:
332-
version = f"{version[:-2]}.{self.ALPHA_TO_NUM[second_last_char]}.{self.ALPHA_TO_NUM[last_char]}"
333-
334-
elif last_char in self.ALPHA_TO_NUM:
335-
version = f"{version[:-1]}.{self.ALPHA_TO_NUM[last_char]}"
336-
return version
337-
338-
VersionType = Version
339-
340-
def canonical_convert(
341-
self, product_info: ProductInfo
342-
) -> Tuple[VersionType, VersionType]:
343-
version_between = parse_version("")
344-
if product_info.version == "":
345-
return parse_version(product_info.version), version_between
346-
if product_info.product in {"openssl", "libjpeg"}:
347-
pv = re.search(r"\d[.\d]*[a-z]?", product_info.version)
348-
version_between = parse_version(self.letter_convert(pv.group(0)))
349-
else:
350-
# Ensure canonical form of version numbering
351-
if ":" in product_info.version:
352-
# Handle x:a.b<string> e.g. 2:7.4+23
353-
components = product_info.version.split(":")
354-
pv = re.search(r"\d[.\d]*", components[1])
355-
else:
356-
# Handle a.b.c<string> e.g. 1.20.9rel1
357-
pv = re.search(r"\d[.\d]*", product_info.version)
358-
if pv is None:
359-
parsed_version = "UNKNOWN"
360-
self.logger.warning(
361-
f"error parsing {product_info.vendor}.{product_info.product} v{product_info.version} - manual inspection required"
362-
)
363-
else:
364-
parsed_version = parse_version(pv.group(0))
365-
return parsed_version, version_between
366-
367298
def affected(self):
368299
"""Returns list of vendor.product and version tuples identified from
369300
scan"""

cve_bin_tool/version_compare.py

Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
# Copyright (C) 2023 Intel Corporation
2+
# SPDX-License-Identifier: GPL-3.0-or-later
3+
4+
import re
5+
6+
"""
7+
A class for comparing arbitrary versions of products.
8+
9+
Splits versions up using common whitespace delimiters and also splits out letters
10+
so that things like openSSL's 1.1.1y type of version will work too.
11+
12+
This may need some additional smarts for stuff like "rc" or "beta" and potentially for
13+
things like distro versioning. I don't know yet.
14+
"""
15+
16+
17+
class CannotParseVersionException(Exception):
18+
"""
19+
Thrown if the version doesn't comply with our expectations
20+
"""
21+
22+
23+
class UnknownVersion(Exception):
24+
"""
25+
Thrown if version is null or "unknown".
26+
"""
27+
28+
29+
def parse_version(version_string: str):
30+
"""
31+
Splits a version string into an array for comparison.
32+
This includes dealing with some letters.
33+
34+
e.g. 1.1.1a would become [1, 1, 1, a]
35+
"""
36+
37+
if not version_string or version_string.lower() == "unknown":
38+
raise UnknownVersion(f"version string = {version_string}")
39+
40+
versionString = version_string.strip()
41+
versionArray = []
42+
43+
# convert - and _ to be treated like . below
44+
# we could switch to a re split but it seems to leave blanks so this is less hassle
45+
versionString = versionString.replace("-", ".")
46+
versionString = versionString.replace("_", ".")
47+
# Note: there may be other non-alphanumeric characters we want to add here in the
48+
# future, but we'd like to look at those cases before adding them in case the version
49+
# logic is very different.
50+
51+
# Attempt a split
52+
split_version = versionString.split(".")
53+
54+
# if the whole string was numeric then we're done and you can move on
55+
if versionString.isnumeric():
56+
versionArray = split_version
57+
return versionArray
58+
59+
# Go through and split up anything like 6a in to 6 and a
60+
number_letter = re.compile("([0-9]+)([a-zA-Z]+)")
61+
letter_number = re.compile("([a-zA-Z]+)([0-9]+)")
62+
for section in split_version:
63+
# if it's all letters or all nubmers, just add it to the array
64+
if section.isnumeric() or section.isalpha():
65+
versionArray.append(section)
66+
67+
# if it looks like 42a split out the letters and numbers
68+
# We will treat 42a as coming *after* version 42.
69+
elif re.match(number_letter, section):
70+
result = re.findall(number_letter, section)
71+
72+
# We're expecting a result that looks like [("42", "a")] but let's verify
73+
# and then add it to the array
74+
if len(result) == 1 and len(result[0]) == 2:
75+
versionArray.append(result[0][0])
76+
versionArray.append(result[0][1])
77+
else:
78+
raise CannotParseVersionException(f"version string = {versionString}")
79+
80+
# if it looks like rc1 or dev7 we'll leave it together as it may be some kind of pre-release
81+
# and we'll probably want to handle it specially in the compare.
82+
# We need to threat 42dev7 as coming *before* version 42.
83+
elif re.match(letter_number, section):
84+
versionArray.append(section)
85+
86+
# If all else fails, complain
87+
else:
88+
if versionString != ".":
89+
raise CannotParseVersionException(f"version string = {versionString}")
90+
91+
return versionArray
92+
93+
94+
def version_compare(v1: str, v2: str):
95+
"""
96+
Compare two versions by converting them to arrays
97+
98+
returns 0 if they're the same.
99+
returns 1 if v1 > v2
100+
returns -1 if v1 < v2findall
101+
n
102+
"""
103+
v1_array = parse_version(v1)
104+
v2_array = parse_version(v2)
105+
106+
for i in range(len(v1_array)):
107+
if len(v2_array) > i:
108+
# If it's all numbers, cast to int and compare
109+
if v1_array[i].isnumeric() and v2_array[i].isnumeric():
110+
if int(v1_array[i]) > int(v2_array[i]):
111+
return 1
112+
if int(v1_array[i]) < int(v2_array[i]):
113+
return -1
114+
115+
# If they're letters just do a string compare, I don't have a better idea
116+
# This might be a bad choice in some cases: Do we want ag < z?
117+
# I suspect projects using letters in version names may not use ranges in nvd
118+
# for this reason (e.g. openssl)
119+
# Converting to lower() so that 3.14a == 3.14A
120+
# but this may not be ideal in all cases
121+
elif v1_array[i].isalpha() and v2_array[i].isalpha():
122+
if v1_array[i].lower() > v2_array[i].lower():
123+
return 1
124+
if v1_array[i].lower() < v2_array[i].lower():
125+
return -1
126+
127+
else:
128+
# They are not the same type, and we're comparing mixed letters and numbers.
129+
# We'll treat letters as less than numbers.
130+
# This will result in things like rc1, dev9, b2 getting treated like pre-releases
131+
# as in https://peps.python.org/pep-0440/
132+
# So 1.2.pre4 would be less than 1.2.1 and (so would 1.2.post1)
133+
if v1_array[i].isalnum() and v2_array[i].isnumeric():
134+
return -1
135+
elif v1_array[i].isnumeric() and v2_array[i].isalnum():
136+
return 1
137+
138+
# They're both of type letter567 and we'll convert them to be letter.567 and
139+
# run them through the compare function again
140+
# Honestly it's hard to guess if .dev3 is going to be more or less than .rc4
141+
# unless you know the project, so hopefully people don't expect that kind of range
142+
# matching
143+
v1_newstring = re.sub("([a-zA-Z]+)([0-9]+)", r"\1.\2", v1_array[i])
144+
v2_newstring = re.sub("([a-zA-Z]+)([0-9]+)", r"\1.\2", v2_array[i])
145+
print(f"`{v1_newstring}` and `{v2_newstring}`")
146+
return version_compare(v1_newstring, v2_newstring)
147+
148+
# And if all else fails, just compare the strings
149+
if v1_array[i] > v2_array[i]:
150+
return 1
151+
if v1_array[i] < v2_array[i]:
152+
return -1
153+
154+
else:
155+
# v1 has more digits than v2
156+
# Check to see if v1's something that looks like a pre-release (a2, dev8, rc4)
157+
# e.g. 4.5.a1 would be less than 4.5
158+
if re.match("([a-zA-Z]+)([0-9]+)", v1_array[i]):
159+
return -1
160+
161+
# Otherwise, v1 has more digits than v2 and the previous ones matched,
162+
# so it's probably later. e.g. 1.2.3 amd 1.2.q are both > 1.2
163+
return 1
164+
165+
# if we made it this far and they've matched, see if there's more stuff in v2
166+
# e.g. 1.2.3 or 1.2a comes after 1.2
167+
if len(v2_array) > len(v1_array):
168+
# special case: if v2 declares itself a post-release, we'll say it's bigger than v1
169+
if v2_array[len(v1_array)].startswith("post"):
170+
return -1
171+
172+
# if what's in v2 next looks like a pre-release number (e.g. a2, dev8, rc4) then we'll
173+
# claim v1 is still bigger, otherwise we'll say v2 is.
174+
if re.match("([0-9]+)([a-zA-Z]+)", v2_array[len(v1_array)]):
175+
return 1
176+
177+
return -1
178+
179+
return 0
180+
181+
182+
class Version(str):
183+
"""
184+
A class to make version comparisons look more pretty:
185+
186+
Version("1.2") > Version("1.1")
187+
"""
188+
189+
def __cmp__(self, other):
190+
"""compare"""
191+
return version_compare(self, other)
192+
193+
def __lt__(self, other):
194+
"""<"""
195+
return bool(version_compare(self, other) < 0)
196+
197+
def __le__(self, other):
198+
"""<="""
199+
return bool(version_compare(self, other) <= 0)
200+
201+
def __gt__(self, other):
202+
""">"""
203+
return bool(version_compare(self, other) > 0)
204+
205+
def __ge__(self, other):
206+
""">="""
207+
return bool(version_compare(self, other) >= 0)
208+
209+
def __eq__(self, other):
210+
"""=="""
211+
return bool(version_compare(self, other) == 0)
212+
213+
def __ne__(self, other):
214+
"""!="""
215+
return bool(version_compare(self, other) != 0)
216+
217+
def __repr__(self):
218+
"""print the version string"""
219+
return f"Version: {self}"

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jsonschema>=3.0.2
1111
lib4sbom>=0.5.0
1212
python-gnupg
1313
packageurl-python
14-
packaging<22.0
14+
packaging
1515
plotly
1616
pyyaml>=5.4
1717
requests

test/test_csv2cve.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ async def test_csv2cve_valid_file(self, caplog):
3131

3232
for cve_count, product in [
3333
[60, "haxx.curl version 7.34.0"],
34-
[10, "mit.kerberos_5 version 1.15.1"],
34+
[9, "mit.kerberos_5 version 1.15.1"],
3535
]:
3636
retrieved_cve_count = 0
3737
for captured_line in caplog.record_tuples:

0 commit comments

Comments
 (0)