Skip to content

Commit 3743b62

Browse files
authored
Merge pull request #33 from nexB/name-version-from-file
Add functions to extract name/version of filename
2 parents f279238 + ae536d2 commit 3743b62

File tree

1 file changed

+225
-1
lines changed

1 file changed

+225
-1
lines changed

src/commoncode/version.py

Lines changed: 225 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
#
88

99
import re
10+
from collections import namedtuple
11+
from os import path
1012

1113

1214
def VERSION_PATTERNS_REGEX():
@@ -37,7 +39,7 @@ def VERSION_PATTERNS_REGEX():
3739

3840
def hint(path):
3941
"""
40-
Return a version found in a path or None. Prefix the version with 'v ' if
42+
Return a version found in a ``path`` or None. Prefix the version with 'v ' if
4143
the version does not start with v.
4244
"""
4345
for pattern in VERSION_PATTERNS_REGEX():
@@ -54,3 +56,225 @@ def hint(path):
5456
if not v.lower().startswith('v'):
5557
v = f'v {v}'
5658
return v
59+
60+
61+
def is_dot_num(s):
62+
"""
63+
Return True if a version string `s` is semver-like and composed only of dots
64+
and numbers.
65+
"""
66+
return s.strip(".0123456789") == "" and not s.startswith(".") and not s.endswith(".")
67+
68+
69+
common_version_suffixes = (
70+
"final",
71+
"release",
72+
"snapshot",
73+
"jre",
74+
"android",
75+
"pre",
76+
"alpha",
77+
"beta",
78+
"rc",
79+
)
80+
common_dash_version_suffixes = tuple(f"-{s}" for s in common_version_suffixes)
81+
82+
83+
def is_moslty_num(s):
84+
"""
85+
Return True if a version string `s` is primarily composed only of dots and
86+
numbers, with a minority of letters.
87+
88+
>>> is_moslty_num("v11r2")
89+
True
90+
"""
91+
dot_segments = s.split(".")
92+
len_alpha = 0
93+
len_digit = 0
94+
first_seg = dot_segments[0].lstrip("vV")
95+
starts_with_digit = first_seg.isdigit()
96+
for dot_seg in dot_segments:
97+
dot_seg = dot_seg.lstrip("vV")
98+
for seg in re.split("([0-9]+|[a-zA-Z]+)", dot_seg):
99+
if seg.isdigit():
100+
len_digit += len(seg)
101+
elif seg.isalpha() and seg.lower() not in common_version_suffixes:
102+
len_alpha += len(seg)
103+
104+
if not len_alpha and not len_digit:
105+
return False
106+
107+
if not len_alpha:
108+
return True
109+
110+
# we want twice more digits than alphas
111+
if (2 * len_alpha) < len_digit:
112+
return True
113+
114+
if starts_with_digit and len_alpha < len_digit:
115+
return True
116+
117+
return False
118+
119+
120+
NameVersion = namedtuple('NameVersion', 'name, version')
121+
122+
123+
def get_jar_nv(filename):
124+
"""
125+
Return a NameVersion tuple parsed from the JAR `filename` or None.
126+
127+
For example::
128+
>>> get_jar_nv('org.eclipse.persistence.antlr_3.2.0.v201302191141.jar')
129+
NameVersion(name='org.eclipse.persistence.antlr', version='3.2.0.v201302191141')
130+
>>> get_jar_nv('org.eclipse.persistence.antlr.jar')
131+
NameVersion(name='org.eclipse.persistence.antlr', version=None)
132+
>>> get_jar_nv('org.eclipse.persistence.core_2.4.2.v20130514-5956486.jar')
133+
NameVersion(name='org.eclipse.persistence.core', version='2.4.2.v20130514-5956486')
134+
135+
>>> get_jar_nv('com.io7m.jareas.checkstyle-0.2.2.jar')
136+
NameVersion(name='com.io7m.jareas.checkstyle', version='0.2.2')
137+
138+
>>> get_jar_nv('ant-contrib-1.0b3.jar')
139+
NameVersion(name='ant-contrib', version='1.0b3')
140+
>>> get_jar_nv('xpp3-1.1.3.4.C.jar')
141+
NameVersion(name='xpp3', version='1.1.3.4.C')
142+
>>> get_jar_nv('ojdbc6_v11r2.jar')
143+
NameVersion(name='ojdbc6', version='v11r2')
144+
145+
>>> get_jar_nv('amazon-sqs-java-messaging-lib-1.0.8.jar')
146+
NameVersion(name='amazon-sqs-java-messaging-lib', version='1.0.8')
147+
>>> get_jar_nv('annotations-4.1.1.4.jar')
148+
NameVersion(name='annotations', version='4.1.1.4')
149+
>>> get_jar_nv('aws-swf-build-tools-1.10.jar')
150+
NameVersion(name='aws-swf-build-tools', version='1.10')
151+
>>> get_jar_nv('c3p0-0.9.1.1.jar')
152+
NameVersion(name='c3p0', version='0.9.1.1')
153+
>>> get_jar_nv('javax.persistence_2.0.5.v201212031355.jar')
154+
NameVersion(name='javax.persistence', version='2.0.5.v201212031355')
155+
>>> get_jar_nv('proto-google-cloud-pubsub-v1-1.95.4.jar')
156+
NameVersion(name='proto-google-cloud-pubsub-v1', version='1.95.4')
157+
158+
>>> get_jar_nv('xpp3-1.1.4c.jar')
159+
NameVersion(name='xpp3', version='1.1.4c')
160+
161+
>>> get_jar_nv('listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar')
162+
NameVersion(name='listenablefuture-9999.0-empty-to-avoid-conflict-with-guava', version=None)
163+
164+
>>> get_jar_nv('aspectjweaver.jar')
165+
NameVersion(name='aspectjweaver', version=None)
166+
>>> get_jar_nv('flyway-client.jar')
167+
NameVersion(name='flyway-client', version=None)
168+
>>> get_jar_nv('jakarta.xml.bind-api.jar')
169+
NameVersion(name='jakarta.xml.bind-api', version=None)
170+
>>> get_jar_nv('javax.enterprise.concurrent.jar')
171+
NameVersion(name='javax.enterprise.concurrent', version=None)
172+
173+
>>> get_jar_nv('netty-codec-http-4.1.53.Final.jar')
174+
NameVersion(name='netty-codec-http', version='4.1.53.Final')
175+
>>> get_jar_nv('spring-context-3.0.7.RELEASE.jar')
176+
NameVersion(name='spring-context', version='3.0.7.RELEASE')
177+
178+
>>> get_jar_nv('guava-30.1-jre.jar')
179+
NameVersion(name='guava', version='30.1-jre')
180+
>>> get_jar_nv('guava-30.1.1-android.jar')
181+
NameVersion(name='guava', version='30.1.1-android')
182+
183+
>>> get_jar_nv('guava-30.1.1-android.foo')
184+
185+
"""
186+
if not filename.endswith(".jar"):
187+
return
188+
189+
basename, _extension = path.splitext(filename)
190+
191+
# JAR name/version come in many flavors
192+
# amazon-sqs-java-messaging-lib-1.0.8.jar is a plain name-ver
193+
if "_" in basename:
194+
# org.eclipse.persistence.antlr_3.2.0.v201302191141.jar
195+
name, _, version = basename.rpartition("_")
196+
if (
197+
is_dot_num(version)
198+
or is_moslty_num(version)
199+
or version.lower().endswith(common_version_suffixes)
200+
):
201+
return NameVersion(name, version)
202+
203+
if "-" in basename:
204+
# amazon-sqs-java-messaging-lib-1.0.8.jar
205+
dashname = basename
206+
suffix = ""
207+
for cs in common_dash_version_suffixes:
208+
if dashname.endswith(cs):
209+
dashname, _, suff = dashname.rpartition("-")
210+
suffix = f"-{suff}"
211+
break
212+
213+
name, _, version = dashname.rpartition("-")
214+
if (
215+
is_dot_num(version)
216+
or is_moslty_num(version)
217+
or version.lower().endswith(common_version_suffixes)
218+
):
219+
return NameVersion(name, f"{version}{suffix}")
220+
221+
# no dash, no underscore means no version: org.eclipse.persistence.antlr.jar
222+
return NameVersion(basename, None)
223+
224+
225+
def get_nupkg_nv(filename):
226+
"""
227+
Return a NameVersion tuple parsed from the .nupkg NuGet archive `filename`.
228+
229+
For example (taken from https://stackoverflow.com/questions/51662737/regex-to-parse-package-name-and-version-number-from-nuget-package-filenames/51662926):
230+
>>> get_nupkg_nv('knockoutjs.3.4.2.nupkg')
231+
NameVersion(name='knockoutjs', version='3.4.2')
232+
>>> get_nupkg_nv('log4net.2.0.8.nupkg')
233+
NameVersion(name='log4net', version='2.0.8')
234+
235+
>>> get_nupkg_nv('runtime.tizen.4.0.0-armel.microsoft.netcore.jit.2.0.0.nupkg')
236+
NameVersion(name='runtime.tizen.4.0.0-armel.microsoft.netcore.jit', version='2.0.0')
237+
>>> get_nupkg_nv('nuget.core.2.7.0-alpha.nupkg')
238+
NameVersion(name='nuget.core', version='2.7.0-alpha')
239+
240+
>>> get_nupkg_nv('microsoft.identitymodel.6.1.7600.16394.nupkg')
241+
NameVersion(name='microsoft.identitymodel', version='6.1.7600.16394')
242+
243+
>>> get_nupkg_nv('guava.30.1.1.foo')
244+
"""
245+
if not filename.endswith(".nupkg"):
246+
return
247+
248+
basename, _extension = path.splitext(filename)
249+
250+
# Either the last 3 or 4 segments are all digits in which case this is the
251+
# version. Otherwise we consider as version anything after the first all
252+
# digit segment starting from left.
253+
254+
dot_segments = basename.split(".")
255+
len_dot_segments = len(dot_segments)
256+
if len_dot_segments > 4 and all(s.isdigit() for s in dot_segments[-4:]):
257+
names = dot_segments[:-4]
258+
versions = dot_segments[-4:]
259+
return NameVersion(".".join(names), ".".join(versions))
260+
261+
if len_dot_segments > 3 and all(s.isdigit() for s in dot_segments[-3:]):
262+
names = dot_segments[:-3]
263+
versions = dot_segments[-3:]
264+
return NameVersion(".".join(names), ".".join(versions))
265+
266+
names = []
267+
versions = []
268+
in_version = False
269+
for seg in dot_segments:
270+
if in_version:
271+
versions.append(seg)
272+
continue
273+
274+
if not seg.isdigit():
275+
names.append(seg)
276+
else:
277+
versions.append(seg)
278+
in_version = True
279+
280+
return NameVersion(".".join(names), ".".join(versions))

0 commit comments

Comments
 (0)