9
9
Splits versions up using common whitespace delimiters and also splits out letters
10
10
so that things like openSSL's 1.1.1y type of version will work too.
11
11
12
- This may need some additional smarts for stuff like "rc" or "beta" and potentially for
13
- things like distro versioning. I don't know yet.
12
+ This handles some pretty strange edge cases. See the test_version_compare.py
13
+ and inline comments for details
14
+
14
15
"""
15
16
16
17
@@ -38,66 +39,25 @@ def parse_version(version_string: str):
38
39
raise UnknownVersion (f"version string = { version_string } " )
39
40
40
41
versionString = version_string .strip ()
41
- versionArray = []
42
42
43
43
# convert all non alpha-numeric characters to be treated like . below
44
44
# we could switch to a re split but it seems to leave blanks so this is less hassle
45
- versionString = re .sub ("[^0-9a-zA-Z]+" , "." , versionString )
46
-
47
45
# Note: This expression may need improvement if we need to handle unicode
46
+ versionString = re .sub ("[^0-9a-zA-Z]+" , "." , versionString )
48
47
49
- # remove any trailing . then split
50
- versionString = versionString .strip ("." )
51
- split_version = versionString .split ("." )
52
-
53
- # if the whole string was numeric then we're done and you can move on
54
- if versionString .isnumeric ():
55
- versionArray = split_version
56
- return versionArray
57
-
58
- # Go through and split up anything like 6a in to 6 and a
59
- number_letter = re .compile ("^([0-9]+)([a-zA-Z]+)$" )
60
- letter_number = re .compile ("^([a-zA-Z]+)([0-9]+)$" )
61
- for section in split_version :
62
- # if it's all letters or all numbers, just add it to the array
63
- if section .isnumeric () or section .isalpha ():
64
- versionArray .append (section )
65
-
66
- # if it looks like 42a split out the letters and numbers
67
- # We will treat 42a as coming *after* version 42.
68
- elif re .match (number_letter , section ):
69
- result = re .findall (number_letter , section )
70
-
71
- # We're expecting a result that looks like [("42", "a")] but let's verify
72
- # and then add it to the array
73
- if len (result ) == 1 and len (result [0 ]) == 2 :
74
- versionArray .append (result [0 ][0 ])
75
- versionArray .append (result [0 ][1 ])
76
- else :
77
- raise CannotParseVersionException (f"version string = { versionString } " )
78
-
79
- # if it looks like rc1 or dev7 we'll leave it together as it may be some kind of pre-release
80
- # and we'll probably want to handle it specially in the compare.
81
- # We need to threat 42dev7 as coming *before* version 42.
82
- elif re .match (letter_number , section ):
83
- versionArray .append (section )
84
-
85
- # It's not a "pure" alpha or number string, it's not something like rc12 or 44g
48
+ # We originally had hash detection in here, but it turns out very few companies
49
+ # use hashes in ranges but more used dates that were getting caught in the same net
50
+ # (see https://github.com/intel/cve-bin-tool/pull/3694 )
51
+ # Hash deteciton may be useful in the future but it would have to be better defined.
86
52
87
- # It could be a hash, which we can't string compare without knowledge of the product.
88
- # It could also be a distro release string like deb8u5, which we could compare
89
- # but the data may not be useful or usable in context.
90
- else :
91
- # If it's the last part of the version just drop it silently
92
- # we could log these but I suspect it would be very noisy
93
- if section == split_version [len (split_version ) - 1 ]:
94
- pass
53
+ # otherwise, split up letters and numbers into separate units for compare
54
+ versionString = re .sub ("([a-zA-Z]+)" , r".\1." , versionString )
95
55
96
- # if it's not, raise an exception because we should probably examine it
97
- elif versionString != "." :
98
- raise CannotParseVersionException ( f"version string = { versionString } " )
56
+ # Clean up any duplicate . and then split
57
+ versionString = re . sub ( r"\.+" , "." , versionString )
58
+ split_version = versionString . strip ( "." ). split ( ". " )
99
59
100
- return versionArray
60
+ return split_version
101
61
102
62
103
63
def version_compare (v1 : str , v2 : str ):
@@ -106,12 +66,14 @@ def version_compare(v1: str, v2: str):
106
66
107
67
returns 0 if they're the same.
108
68
returns 1 if v1 > v2
109
- returns -1 if v1 < v2findall
110
- n
69
+ returns -1 if v1 < v2
111
70
"""
112
71
v1_array = parse_version (v1 )
113
72
v2_array = parse_version (v2 )
114
73
74
+ # We'll treat the following strings as pre-releases.
75
+ pre_release_words = {"pre" , "rc" , "alpha" , "beta" , "dev" }
76
+
115
77
for i in range (len (v1_array )):
116
78
if len (v2_array ) > i :
117
79
# If it's all numbers, cast to int and compare
@@ -121,46 +83,44 @@ def version_compare(v1: str, v2: str):
121
83
if int (v1_array [i ]) < int (v2_array [i ]):
122
84
return - 1
123
85
124
- # If they're letters just do a string compare, I don't have a better idea
86
+ # If they're letters do a string compare.
125
87
# This might be a bad choice in some cases: Do we want ag < z?
126
88
# I suspect projects using letters in version names may not use ranges in nvd
127
89
# for this reason (e.g. openssl)
128
90
# Converting to lower() so that 3.14a == 3.14A
129
91
# but this may not be ideal in all cases
130
92
elif v1_array [i ].isalpha () and v2_array [i ].isalpha ():
93
+ # allow pre-releases to come before arbitrary letters.
94
+ if (
95
+ v1_array [i ] in pre_release_words
96
+ and v2_array [i ] not in pre_release_words
97
+ ):
98
+ return - 1
99
+ if (
100
+ v1_array [i ] not in pre_release_words
101
+ and v2_array [i ] in pre_release_words
102
+ ):
103
+ return 1
104
+
105
+ # Note that if both are in the pre-release list we alpha compare
131
106
if v1_array [i ].lower () > v2_array [i ].lower ():
132
107
return 1
133
108
if v1_array [i ].lower () < v2_array [i ].lower ():
134
109
return - 1
135
110
136
111
else :
137
112
# They are not the same type, and we're comparing mixed letters and numbers.
138
- # We'll treat letters as less than numbers.
139
- # This will result in things like rc1, dev9, b2 getting treated like pre-releases
140
- # as in https://peps.python.org/pep-0440/
141
- # So 1.2.pre4 would be less than 1.2.1 and (so would 1.2.post1)
113
+ # We treat letters less than numbers
114
+
115
+ # This may cause false positives with some distro numbers
116
+ # e.g. 1.4.ubuntu8 may have fixed some issues in 1.4,
117
+ # But since we can't be sure we'll return the 'safer' result
118
+ # and let users triage themselves.
142
119
if v1_array [i ].isalnum () and v2_array [i ].isnumeric ():
143
120
return - 1
144
121
elif v1_array [i ].isnumeric () and v2_array [i ].isalnum ():
145
122
return 1
146
123
147
- # They're both of type letter567 and we'll convert them to be letter.567 and
148
- # run them through the compare function again
149
- # We will be dictionary comparing so that 4.alpha4 < 4.beta1
150
- # but this also means .dev3 < .rc4 (because d is before r)
151
- # which may make less sense depending on the project.
152
- letter_number = re .compile ("^[a-zA-Z]+[0-9]+$" )
153
- if re .match (letter_number , v1_array [i ]) and re .match (
154
- letter_number , v2_array [i ]
155
- ):
156
- v1_letter_number = re .sub (
157
- "([a-zA-Z]+)([0-9]+)" , r"\1.\2" , v1_array [i ]
158
- )
159
- v2_letter_number = re .sub (
160
- "([a-zA-Z]+)([0-9]+)" , r"\1.\2" , v2_array [i ]
161
- )
162
- return version_compare (v1_letter_number , v2_letter_number )
163
-
164
124
# And if all else fails, just compare the strings
165
125
if v1_array [i ] > v2_array [i ]:
166
126
return 1
@@ -171,7 +131,7 @@ def version_compare(v1: str, v2: str):
171
131
# v1 has more digits than v2
172
132
# Check to see if v1's something that looks like a pre-release (a2, dev8, rc4)
173
133
# e.g. 4.5.a1 would be less than 4.5
174
- if re . match ( "([a-zA-Z]+)([0-9]+)" , v1_array [i ]) :
134
+ if v1_array [i ] in pre_release_words :
175
135
return - 1
176
136
177
137
# Otherwise, v1 has more digits than v2 and the previous ones matched,
@@ -185,9 +145,9 @@ def version_compare(v1: str, v2: str):
185
145
if v2_array [len (v1_array )].startswith ("post" ):
186
146
return - 1
187
147
188
- # if what's in v2 next looks like a pre-release number (e.g. a2, dev8, rc4) then we'll
148
+ # if what's in v2 next looks like a pre-release then we'll
189
149
# claim v1 is still bigger, otherwise we'll say v2 is.
190
- if re . match ( "([0-9]+)([a-zA-Z]+)" , v2_array [len (v1_array )]) :
150
+ if v2_array [len (v1_array )] in pre_release_words :
191
151
return 1
192
152
193
153
return - 1
@@ -232,4 +192,4 @@ def __ne__(self, other):
232
192
233
193
def __repr__ (self ):
234
194
"""print the version string"""
235
- return f"Version: { self } "
195
+ return f"Version: { self } aka { parse_version ( self ) } "
0 commit comments