Skip to content

Commit aaa5dfc

Browse files
committed
Add maven source downloadable format
Signed-off-by: 석지영/책임연구원/SW공학(연)Open Source TP <[email protected]>
1 parent 2f8284e commit aaa5dfc

File tree

2 files changed

+211
-68
lines changed

2 files changed

+211
-68
lines changed

src/fosslight_util/_get_downloadable_url.py

Lines changed: 209 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -54,76 +54,170 @@ def extract_name_version_from_link(link, checkout_version):
5454
oss_name = ""
5555
oss_version = ""
5656
matched = False
57+
direct_maven = False
58+
5759
if link.startswith("www."):
5860
link = link.replace("www.", "https://www.", 1)
59-
for key, value in constant.PKG_PATTERN.items():
60-
p = re.compile(value)
61-
match = p.match(link)
62-
if match:
63-
try:
64-
origin_name = match.group(1)
65-
if (key == "pypi") or (key == "pypi2"):
66-
oss_name = f"pypi:{origin_name}"
67-
oss_name = re.sub(r"[-_.]+", "-", oss_name)
68-
oss_version = match.group(2)
69-
elif key == "maven":
70-
artifact = match.group(2)
71-
oss_name = f"{origin_name}:{artifact}"
72-
origin_name = oss_name
73-
oss_version = match.group(3)
74-
elif key == "npm" or key == "npm2":
75-
oss_name = f"npm:{origin_name}"
76-
oss_version = match.group(2)
77-
elif key == "pub":
78-
oss_name = f"pub:{origin_name}"
79-
oss_version = match.group(2)
80-
elif key == "cocoapods":
81-
oss_name = f"cocoapods:{origin_name}"
82-
elif key == "go":
83-
if origin_name.endswith('/'):
84-
origin_name = origin_name[:-1]
85-
oss_name = f"go:{origin_name}"
86-
oss_version = match.group(2)
87-
elif key == "cargo":
88-
oss_name = f"cargo:{origin_name}"
89-
oss_version = match.group(2)
90-
except Exception as ex:
91-
logger.info(f"extract_name_version_from_link {key}:{ex}")
92-
if oss_name:
93-
# Priority: 1) detected oss_version 2) checkout_version 3) latest
94-
need_latest = False
95-
96-
if not oss_version and checkout_version:
97-
oss_version = checkout_version.strip()
98-
if key in ["pypi", "maven", "npm", "npm2", "pub", "go"]:
99-
if oss_version:
100-
try:
101-
if not version_exists(key, origin_name, oss_version):
102-
logger.info(f'Version {oss_version} not found for {oss_name}; will attempt latest fallback')
103-
need_latest = True
104-
except Exception as e:
105-
logger.info(f'Version validation failed ({oss_name}:{oss_version}) {e}; will attempt latest fallback')
106-
need_latest = True
107-
else:
108-
need_latest = True
109-
if need_latest:
110-
latest_ver = get_latest_package_version(link, key, origin_name)
111-
if latest_ver:
112-
if oss_version and latest_ver != oss_version:
113-
logger.info(f'Fallback to latest version {latest_ver} (previous invalid: {oss_version})')
114-
elif not oss_version:
115-
logger.info(f'Using latest version {latest_ver} (no version detected)')
116-
oss_version = latest_ver
117-
if oss_version:
118-
try:
119-
link = get_new_link_with_version(link, key, origin_name, oss_version)
120-
except Exception as _e:
121-
logger.info(f'Failed to build versioned link for {oss_name}:{oss_version} {_e}')
61+
62+
if not matched and (link.startswith('https://repo1.maven.org/maven2/') or link.startswith('https://dl.google.com/android/maven2/')):
63+
parsed = parse_direct_maven_url(link)
64+
if parsed:
65+
origin_name, parsed_version = parsed
66+
oss_name = origin_name # groupId:artifactId
67+
oss_version = parsed_version or ""
12268
matched = True
123-
break
69+
direct_maven = True
70+
pkg_type = 'maven'
71+
72+
for direct_key in ["maven_repo1", "maven_google"]:
73+
pattern = constant.PKG_PATTERN.get(direct_key)
74+
if pattern and re.match(pattern, link):
75+
parsed = parse_direct_maven_url(link)
76+
if parsed:
77+
origin_name, parsed_version = parsed
78+
oss_name = origin_name
79+
oss_version = parsed_version or ""
80+
matched = True
81+
direct_maven = True
82+
pkg_type = 'maven'
83+
break
84+
12485
if not matched:
125-
key = ""
126-
return oss_name, oss_version, link, key
86+
for key, value in constant.PKG_PATTERN.items():
87+
if key in ["maven_repo1", "maven_google"]:
88+
continue
89+
p = re.compile(value)
90+
match = p.match(link)
91+
if match:
92+
try:
93+
pkg_type = key
94+
origin_name = match.group(1)
95+
if (key == "pypi") or (key == "pypi2"):
96+
oss_name = f"pypi:{origin_name}"
97+
oss_name = re.sub(r"[-_.]+", "-", oss_name)
98+
oss_version = match.group(2)
99+
pkg_type = 'pypi'
100+
elif key == "maven":
101+
artifact = match.group(2)
102+
oss_name = f"{origin_name}:{artifact}"
103+
origin_name = oss_name
104+
oss_version = match.group(3)
105+
elif key == "npm" or key == "npm2":
106+
oss_name = f"npm:{origin_name}"
107+
oss_version = match.group(2)
108+
elif key == "pub":
109+
oss_name = f"pub:{origin_name}"
110+
oss_version = match.group(2)
111+
elif key == "cocoapods":
112+
oss_name = f"cocoapods:{origin_name}"
113+
elif key == "go":
114+
if origin_name.endswith('/'):
115+
origin_name = origin_name[:-1]
116+
oss_name = f"go:{origin_name}"
117+
oss_version = match.group(2)
118+
elif key == "cargo":
119+
oss_name = f"cargo:{origin_name}"
120+
oss_version = match.group(2)
121+
except Exception as ex:
122+
logger.info(f"extract_name_version_from_link {key}:{ex}")
123+
if oss_name:
124+
matched = True
125+
break
126+
127+
if not matched:
128+
return "", "", link, ""
129+
else:
130+
need_latest = False
131+
if not oss_version and checkout_version:
132+
oss_version = checkout_version.strip()
133+
if pkg_type in ["pypi", "maven", "npm", "npm2", "pub", "go"]:
134+
if oss_version:
135+
try:
136+
if not version_exists(pkg_type, origin_name, oss_version):
137+
logger.info(f'Version {oss_version} not found for {oss_name}; will attempt latest fallback')
138+
need_latest = True
139+
except Exception as e:
140+
logger.info(f'Version validation failed ({oss_name}:{oss_version}) {e}; will attempt latest fallback')
141+
need_latest = True
142+
else:
143+
need_latest = True
144+
if need_latest:
145+
latest_ver = get_latest_package_version(link, pkg_type, origin_name)
146+
if latest_ver:
147+
if oss_version and latest_ver != oss_version:
148+
logger.info(f'Fallback to latest version {latest_ver} (previous invalid: {oss_version})')
149+
elif not oss_version:
150+
logger.info(f'Using latest version {latest_ver} (no version detected)')
151+
oss_version = latest_ver
152+
153+
try:
154+
if oss_version:
155+
if pkg_type == 'maven' and direct_maven:
156+
# Skip if oss_name malformed
157+
if ':' in oss_name:
158+
parts = oss_name.split(':', 1)
159+
group_id, artifact_id = parts[0], parts[1]
160+
group_path = group_id.replace('.', '/')
161+
if link.startswith('https://repo1.maven.org/maven2/') or link.startswith('http://repo1.maven.org/maven2/'):
162+
if not re.search(r'/\d[^/]*/*$', link.rstrip('/')):
163+
link = f'https://repo1.maven.org/maven2/{group_path}/{artifact_id}/{oss_version}'
164+
elif link.startswith('https://dl.google.com/android/maven2/') or link.startswith('http://dl.google.com/android/maven2/'):
165+
if not re.search(r'/\d[^/]*/*$', link.rstrip('/')):
166+
link = f'https://dl.google.com/android/maven2/{group_path}/{artifact_id}/{oss_version}/{artifact_id}-{oss_version}-sources.jar'
167+
else:
168+
logger.debug(f'Skip maven normalization due to invalid oss_name: {oss_name}')
169+
else:
170+
link = get_new_link_with_version(link, pkg_type, origin_name, oss_version)
171+
except Exception as _e:
172+
logger.info(f'Failed to build versioned link for {oss_name or origin_name}:{oss_version} {_e}')
173+
174+
return oss_name, oss_version, link, pkg_type
175+
176+
177+
def parse_direct_maven_url(url):
178+
try:
179+
clean_url = url.replace('https://', '').replace('http://', '')
180+
if clean_url.startswith('repo1.maven.org/maven2/'):
181+
base_path = clean_url[len('repo1.maven.org/maven2/'):]
182+
elif clean_url.startswith('dl.google.com/android/maven2/'):
183+
base_path = clean_url[len('dl.google.com/android/maven2/'):]
184+
else:
185+
return None
186+
187+
base_path = base_path.rstrip('/')
188+
# Strip file name if ends with known artifact extension.
189+
if any(base_path.endswith(ext) for ext in ['.jar', '.pom', '.aar']):
190+
base_path = '/'.join(base_path.split('/')[:-1])
191+
192+
parts = base_path.split('/')
193+
if len(parts) < 2:
194+
return None
195+
196+
version = None
197+
artifact_id = None
198+
if len(parts) >= 3:
199+
potential_version = parts[-1]
200+
potential_artifact = parts[-2]
201+
if re.search(r'\d', potential_version):
202+
version = potential_version
203+
artifact_id = potential_artifact
204+
group_parts = parts[:-2]
205+
else:
206+
artifact_id = parts[-1]
207+
group_parts = parts[:-1]
208+
else:
209+
artifact_id = parts[-1]
210+
group_parts = parts[:-1]
211+
212+
group_id = '.'.join(group_parts)
213+
if not group_id or not artifact_id:
214+
return None
215+
216+
maven_name = f"{group_id}:{artifact_id}"
217+
return maven_name, version
218+
except Exception as e:
219+
logger.debug(f'Failed to parse direct Maven URL {url}: {e}')
220+
return None
127221

128222

129223
def get_new_link_with_version(link, pkg_type, oss_name, oss_version):
@@ -160,7 +254,47 @@ def get_latest_package_version(link, pkg_type, oss_name):
160254
if maven_response.status_code == 200:
161255
versions = maven_response.json().get('versions', [])
162256
if versions:
163-
cand = max(versions, key=lambda v: v.get('publishedAt', ''))
257+
# Some version entries may miss publishedAt; fallback to semantic version ordering.
258+
def sem_key(vstr: str):
259+
# Parse semantic version with optional prerelease label
260+
# Examples: 1.9.0, 1.10.0-alpha, 2.0.0-rc
261+
m = re.match(r'^(\d+)(?:\.(\d+))?(?:\.(\d+))?(?:[-.]([A-Za-z0-9]+))?$', vstr)
262+
if not m:
263+
return (0, 0, 0, 999)
264+
major = int(m.group(1) or 0)
265+
minor = int(m.group(2) or 0)
266+
patch = int(m.group(3) or 0)
267+
label = (m.group(4) or '').lower()
268+
# Assign label weights: stable > rc > beta > alpha
269+
label_weight_map = {
270+
'alpha': -3,
271+
'beta': -2,
272+
'rc': -1
273+
}
274+
weight = label_weight_map.get(label, 0 if label == '' else -4)
275+
return (major, minor, patch, weight)
276+
277+
with_pub = [v for v in versions if v.get('publishedAt')]
278+
if with_pub:
279+
cand = max(with_pub, key=lambda v: v.get('publishedAt'))
280+
else:
281+
decorated = []
282+
for v in versions:
283+
vkey = v.get('versionKey', {})
284+
ver = vkey.get('version', '')
285+
if ver:
286+
decorated.append((sem_key(ver), ver, v))
287+
if decorated:
288+
# Sort ascending by sem_key; last is highest (stable has better key because is_prerelease flag smaller)
289+
decorated.sort(key=lambda t: t[0])
290+
# Filter stable (is_prerelease flag extracted from sem_key tuple fourth element: (major, minor, patch, weight))
291+
stable_candidates = [t for t in decorated if t[0][3] == 0]
292+
if stable_candidates:
293+
cand = stable_candidates[-1][2]
294+
else:
295+
cand = decorated[-1][2]
296+
else:
297+
cand = versions[-1]
164298
find_version = cand.get('versionKey', {}).get('version', '')
165299
elif pkg_type == 'pub':
166300
pub_response = requests.get(f'https://pub.dev/api/packages/{oss_name}')
@@ -188,7 +322,7 @@ def get_downloadable_url(link, checkout_version):
188322

189323
if pkg_type == "pypi":
190324
ret, result_link = get_download_location_for_pypi(new_link)
191-
elif pkg_type == "maven" or new_link.startswith('repo1.maven.org/'):
325+
elif pkg_type == "maven" or new_link.startswith('repo1.maven.org/') or new_link.startswith('dl.google.com/android/maven2/'):
192326
ret, result_link = get_download_location_for_maven(new_link)
193327
elif (pkg_type in ["npm", "npm2"]) or new_link.startswith('registry.npmjs.org/'):
194328
ret, result_link = get_download_location_for_npm(new_link)
@@ -365,6 +499,13 @@ def get_download_location_for_maven(link):
365499
return ret, new_link
366500
else:
367501
dn_loc = 'https://' + link
502+
elif link.startswith('dl.google.com/android/maven2/'):
503+
if link.endswith('.jar'):
504+
new_link = 'https://' + link
505+
ret = True
506+
return ret, new_link
507+
else:
508+
dn_loc = 'https://' + link
368509
else:
369510
raise Exception("not valid url for maven")
370511

src/fosslight_util/constant.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@
4040
"pypi": r'https?:\/\/pypi\.org\/project\/([^\/]+)[\/]?([^\/]*)',
4141
"pypi2": r'https?:\/\/files\.pythonhosted\.org\/packages\/source\/[\w]\/([^\/]+)\/[\S]+-([^\-]+)\.tar\.gz',
4242
"maven": r'https?:\/\/mvnrepository\.com\/artifact\/([^\/]+)\/([^\/]+)\/?([^\/]*)',
43+
"maven_repo1": r'https?:\/\/repo1\.maven\.org\/maven2\/(.*)',
44+
"maven_google": r'https?:\/\/dl\.google\.com\/android\/maven2\/(.*)',
4345
"npm": r'https?:\/\/www\.npmjs\.com\/package\/([^\/\@]+)(?:\/v\/)?([^\/]*)',
4446
"npm2": r'https?:\/\/www\.npmjs\.com\/package\/(\@[^\/]+\/[^\/]+)(?:\/v\/)?([^\/]*)',
4547
"pub": r'https?:\/\/pub\.dev\/packages\/([^\/]+)(?:\/versions\/)?([^\/]*)',

0 commit comments

Comments
 (0)