99Splits versions up using common whitespace delimiters and also splits out letters
1010so that things like openSSL's 1.1.1y type of version will work too.
1111
12- This may need some additional smarts for stuff like "rc" or "beta" and potentially for
13- things like distro versioning. I don't know yet.
12+ This handles some pretty strange edge cases. See the test_version_compare.py
13+ and inline comments for details
14+
1415"""
1516
1617
@@ -38,66 +39,25 @@ def parse_version(version_string: str):
3839 raise UnknownVersion (f"version string = { version_string } " )
3940
4041 versionString = version_string .strip ()
41- versionArray = []
4242
4343 # convert all non alpha-numeric characters to be treated like . below
4444 # we could switch to a re split but it seems to leave blanks so this is less hassle
45- versionString = re .sub ("[^0-9a-zA-Z]+" , "." , versionString )
46-
4745 # Note: This expression may need improvement if we need to handle unicode
46+ versionString = re .sub ("[^0-9a-zA-Z]+" , "." , versionString )
4847
49- # remove any trailing . then split
50- versionString = versionString .strip ("." )
51- split_version = versionString .split ("." )
52-
53- # if the whole string was numeric then we're done and you can move on
54- if versionString .isnumeric ():
55- versionArray = split_version
56- return versionArray
57-
58- # Go through and split up anything like 6a in to 6 and a
59- number_letter = re .compile ("^([0-9]+)([a-zA-Z]+)$" )
60- letter_number = re .compile ("^([a-zA-Z]+)([0-9]+)$" )
61- for section in split_version :
62- # if it's all letters or all numbers, just add it to the array
63- if section .isnumeric () or section .isalpha ():
64- versionArray .append (section )
65-
66- # if it looks like 42a split out the letters and numbers
67- # We will treat 42a as coming *after* version 42.
68- elif re .match (number_letter , section ):
69- result = re .findall (number_letter , section )
70-
71- # We're expecting a result that looks like [("42", "a")] but let's verify
72- # and then add it to the array
73- if len (result ) == 1 and len (result [0 ]) == 2 :
74- versionArray .append (result [0 ][0 ])
75- versionArray .append (result [0 ][1 ])
76- else :
77- raise CannotParseVersionException (f"version string = { versionString } " )
78-
79- # if it looks like rc1 or dev7 we'll leave it together as it may be some kind of pre-release
80- # and we'll probably want to handle it specially in the compare.
81- # We need to threat 42dev7 as coming *before* version 42.
82- elif re .match (letter_number , section ):
83- versionArray .append (section )
84-
85- # It's not a "pure" alpha or number string, it's not something like rc12 or 44g
48+ # We originally had hash detection in here, but it turns out very few companies
49+ # use hashes in ranges but more used dates that were getting caught in the same net
50+ # (see https://github.com/intel/cve-bin-tool/pull/3694 )
51+ # Hash deteciton may be useful in the future but it would have to be better defined.
8652
87- # It could be a hash, which we can't string compare without knowledge of the product.
88- # It could also be a distro release string like deb8u5, which we could compare
89- # but the data may not be useful or usable in context.
90- else :
91- # If it's the last part of the version just drop it silently
92- # we could log these but I suspect it would be very noisy
93- if section == split_version [len (split_version ) - 1 ]:
94- pass
53+ # otherwise, split up letters and numbers into separate units for compare
54+ versionString = re .sub ("([a-zA-Z]+)" , r".\1." , versionString )
9555
96- # if it's not, raise an exception because we should probably examine it
97- elif versionString != "." :
98- raise CannotParseVersionException ( f"version string = { versionString } " )
56+ # Clean up any duplicate . and then split
57+ versionString = re . sub ( r"\.+" , "." , versionString )
58+ split_version = versionString . strip ( "." ). split ( ". " )
9959
100- return versionArray
60+ return split_version
10161
10262
10363def version_compare (v1 : str , v2 : str ):
@@ -106,12 +66,14 @@ def version_compare(v1: str, v2: str):
10666
10767 returns 0 if they're the same.
10868 returns 1 if v1 > v2
109- returns -1 if v1 < v2findall
110- n
69+ returns -1 if v1 < v2
11170 """
11271 v1_array = parse_version (v1 )
11372 v2_array = parse_version (v2 )
11473
74+ # We'll treat the following strings as pre-releases.
75+ pre_release_words = {"pre" , "rc" , "alpha" , "beta" , "dev" }
76+
11577 for i in range (len (v1_array )):
11678 if len (v2_array ) > i :
11779 # If it's all numbers, cast to int and compare
@@ -121,46 +83,44 @@ def version_compare(v1: str, v2: str):
12183 if int (v1_array [i ]) < int (v2_array [i ]):
12284 return - 1
12385
124- # If they're letters just do a string compare, I don't have a better idea
86+ # If they're letters do a string compare.
12587 # This might be a bad choice in some cases: Do we want ag < z?
12688 # I suspect projects using letters in version names may not use ranges in nvd
12789 # for this reason (e.g. openssl)
12890 # Converting to lower() so that 3.14a == 3.14A
12991 # but this may not be ideal in all cases
13092 elif v1_array [i ].isalpha () and v2_array [i ].isalpha ():
93+ # allow pre-releases to come before arbitrary letters.
94+ if (
95+ v1_array [i ] in pre_release_words
96+ and v2_array [i ] not in pre_release_words
97+ ):
98+ return - 1
99+ if (
100+ v1_array [i ] not in pre_release_words
101+ and v2_array [i ] in pre_release_words
102+ ):
103+ return 1
104+
105+ # Note that if both are in the pre-release list we alpha compare
131106 if v1_array [i ].lower () > v2_array [i ].lower ():
132107 return 1
133108 if v1_array [i ].lower () < v2_array [i ].lower ():
134109 return - 1
135110
136111 else :
137112 # They are not the same type, and we're comparing mixed letters and numbers.
138- # We'll treat letters as less than numbers.
139- # This will result in things like rc1, dev9, b2 getting treated like pre-releases
140- # as in https://peps.python.org/pep-0440/
141- # So 1.2.pre4 would be less than 1.2.1 and (so would 1.2.post1)
113+ # We treat letters less than numbers
114+
115+ # This may cause false positives with some distro numbers
116+ # e.g. 1.4.ubuntu8 may have fixed some issues in 1.4,
117+ # But since we can't be sure we'll return the 'safer' result
118+ # and let users triage themselves.
142119 if v1_array [i ].isalnum () and v2_array [i ].isnumeric ():
143120 return - 1
144121 elif v1_array [i ].isnumeric () and v2_array [i ].isalnum ():
145122 return 1
146123
147- # They're both of type letter567 and we'll convert them to be letter.567 and
148- # run them through the compare function again
149- # We will be dictionary comparing so that 4.alpha4 < 4.beta1
150- # but this also means .dev3 < .rc4 (because d is before r)
151- # which may make less sense depending on the project.
152- letter_number = re .compile ("^[a-zA-Z]+[0-9]+$" )
153- if re .match (letter_number , v1_array [i ]) and re .match (
154- letter_number , v2_array [i ]
155- ):
156- v1_letter_number = re .sub (
157- "([a-zA-Z]+)([0-9]+)" , r"\1.\2" , v1_array [i ]
158- )
159- v2_letter_number = re .sub (
160- "([a-zA-Z]+)([0-9]+)" , r"\1.\2" , v2_array [i ]
161- )
162- return version_compare (v1_letter_number , v2_letter_number )
163-
164124 # And if all else fails, just compare the strings
165125 if v1_array [i ] > v2_array [i ]:
166126 return 1
@@ -171,7 +131,7 @@ def version_compare(v1: str, v2: str):
171131 # v1 has more digits than v2
172132 # Check to see if v1's something that looks like a pre-release (a2, dev8, rc4)
173133 # e.g. 4.5.a1 would be less than 4.5
174- if re . match ( "([a-zA-Z]+)([0-9]+)" , v1_array [i ]) :
134+ if v1_array [i ] in pre_release_words :
175135 return - 1
176136
177137 # Otherwise, v1 has more digits than v2 and the previous ones matched,
@@ -185,9 +145,9 @@ def version_compare(v1: str, v2: str):
185145 if v2_array [len (v1_array )].startswith ("post" ):
186146 return - 1
187147
188- # if what's in v2 next looks like a pre-release number (e.g. a2, dev8, rc4) then we'll
148+ # if what's in v2 next looks like a pre-release then we'll
189149 # claim v1 is still bigger, otherwise we'll say v2 is.
190- if re . match ( "([0-9]+)([a-zA-Z]+)" , v2_array [len (v1_array )]) :
150+ if v2_array [len (v1_array )] in pre_release_words :
191151 return 1
192152
193153 return - 1
@@ -232,4 +192,4 @@ def __ne__(self, other):
232192
233193 def __repr__ (self ):
234194 """print the version string"""
235- return f"Version: { self } "
195+ return f"Version: { self } aka { parse_version ( self ) } "
0 commit comments