Skip to content

Commit 7ee7232

Browse files
authored
Add maven source downloadable format (#234)
Signed-off-by: 석지영/책임연구원/SW공학(연)Open Source TP <[email protected]>
1 parent 2f8284e commit 7ee7232

File tree

2 files changed

+224
-68
lines changed

2 files changed

+224
-68
lines changed

src/fosslight_util/_get_downloadable_url.py

Lines changed: 222 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -54,76 +54,185 @@ def extract_name_version_from_link(link, checkout_version):
5454
oss_name = ""
5555
oss_version = ""
5656
matched = False
57+
direct_maven = False
58+
5759
if link.startswith("www."):
5860
link = link.replace("www.", "https://www.", 1)
59-
for key, value in constant.PKG_PATTERN.items():
60-
p = re.compile(value)
61-
match = p.match(link)
62-
if match:
63-
try:
64-
origin_name = match.group(1)
65-
if (key == "pypi") or (key == "pypi2"):
66-
oss_name = f"pypi:{origin_name}"
67-
oss_name = re.sub(r"[-_.]+", "-", oss_name)
68-
oss_version = match.group(2)
69-
elif key == "maven":
70-
artifact = match.group(2)
71-
oss_name = f"{origin_name}:{artifact}"
72-
origin_name = oss_name
73-
oss_version = match.group(3)
74-
elif key == "npm" or key == "npm2":
75-
oss_name = f"npm:{origin_name}"
76-
oss_version = match.group(2)
77-
elif key == "pub":
78-
oss_name = f"pub:{origin_name}"
79-
oss_version = match.group(2)
80-
elif key == "cocoapods":
81-
oss_name = f"cocoapods:{origin_name}"
82-
elif key == "go":
83-
if origin_name.endswith('/'):
84-
origin_name = origin_name[:-1]
85-
oss_name = f"go:{origin_name}"
86-
oss_version = match.group(2)
87-
elif key == "cargo":
88-
oss_name = f"cargo:{origin_name}"
89-
oss_version = match.group(2)
90-
except Exception as ex:
91-
logger.info(f"extract_name_version_from_link {key}:{ex}")
92-
if oss_name:
93-
# Priority: 1) detected oss_version 2) checkout_version 3) latest
94-
need_latest = False
95-
96-
if not oss_version and checkout_version:
97-
oss_version = checkout_version.strip()
98-
if key in ["pypi", "maven", "npm", "npm2", "pub", "go"]:
99-
if oss_version:
100-
try:
101-
if not version_exists(key, origin_name, oss_version):
102-
logger.info(f'Version {oss_version} not found for {oss_name}; will attempt latest fallback')
103-
need_latest = True
104-
except Exception as e:
105-
logger.info(f'Version validation failed ({oss_name}:{oss_version}) {e}; will attempt latest fallback')
106-
need_latest = True
107-
else:
108-
need_latest = True
109-
if need_latest:
110-
latest_ver = get_latest_package_version(link, key, origin_name)
111-
if latest_ver:
112-
if oss_version and latest_ver != oss_version:
113-
logger.info(f'Fallback to latest version {latest_ver} (previous invalid: {oss_version})')
114-
elif not oss_version:
115-
logger.info(f'Using latest version {latest_ver} (no version detected)')
116-
oss_version = latest_ver
117-
if oss_version:
118-
try:
119-
link = get_new_link_with_version(link, key, origin_name, oss_version)
120-
except Exception as _e:
121-
logger.info(f'Failed to build versioned link for {oss_name}:{oss_version} {_e}')
61+
62+
if (not matched and (
63+
link.startswith('https://repo1.maven.org/maven2/') or
64+
link.startswith('https://dl.google.com/android/maven2/')
65+
)):
66+
parsed = parse_direct_maven_url(link)
67+
if parsed:
68+
origin_name, parsed_version = parsed
69+
oss_name = origin_name # groupId:artifactId
70+
oss_version = parsed_version or ""
12271
matched = True
123-
break
72+
direct_maven = True
73+
pkg_type = 'maven'
74+
75+
for direct_key in ["maven_repo1", "maven_google"]:
76+
pattern = constant.PKG_PATTERN.get(direct_key)
77+
if pattern and re.match(pattern, link):
78+
parsed = parse_direct_maven_url(link)
79+
if parsed:
80+
origin_name, parsed_version = parsed
81+
oss_name = origin_name
82+
oss_version = parsed_version or ""
83+
matched = True
84+
direct_maven = True
85+
pkg_type = 'maven'
86+
break
87+
12488
if not matched:
125-
key = ""
126-
return oss_name, oss_version, link, key
89+
for key, value in constant.PKG_PATTERN.items():
90+
if key in ["maven_repo1", "maven_google"]:
91+
continue
92+
p = re.compile(value)
93+
match = p.match(link)
94+
if match:
95+
try:
96+
pkg_type = key
97+
origin_name = match.group(1)
98+
if (key == "pypi") or (key == "pypi2"):
99+
oss_name = f"pypi:{origin_name}"
100+
oss_name = re.sub(r"[-_.]+", "-", oss_name)
101+
oss_version = match.group(2)
102+
pkg_type = 'pypi'
103+
elif key == "maven":
104+
artifact = match.group(2)
105+
oss_name = f"{origin_name}:{artifact}"
106+
origin_name = oss_name
107+
oss_version = match.group(3)
108+
elif key == "npm" or key == "npm2":
109+
oss_name = f"npm:{origin_name}"
110+
oss_version = match.group(2)
111+
elif key == "pub":
112+
oss_name = f"pub:{origin_name}"
113+
oss_version = match.group(2)
114+
elif key == "cocoapods":
115+
oss_name = f"cocoapods:{origin_name}"
116+
elif key == "go":
117+
if origin_name.endswith('/'):
118+
origin_name = origin_name[:-1]
119+
oss_name = f"go:{origin_name}"
120+
oss_version = match.group(2)
121+
elif key == "cargo":
122+
oss_name = f"cargo:{origin_name}"
123+
oss_version = match.group(2)
124+
except Exception as ex:
125+
logger.info(f"extract_name_version_from_link {key}:{ex}")
126+
if oss_name:
127+
matched = True
128+
break
129+
130+
if not matched:
131+
return "", "", link, ""
132+
else:
133+
need_latest = False
134+
if not oss_version and checkout_version:
135+
oss_version = checkout_version.strip()
136+
if pkg_type in ["pypi", "maven", "npm", "npm2", "pub", "go"]:
137+
if oss_version:
138+
try:
139+
if not version_exists(pkg_type, origin_name, oss_version):
140+
logger.info(f'Version {oss_version} not found for {oss_name}; will attempt latest fallback')
141+
need_latest = True
142+
except Exception as e:
143+
logger.info(f'Version validation failed ({oss_name}:{oss_version}) {e}; will attempt latest fallback')
144+
need_latest = True
145+
else:
146+
need_latest = True
147+
if need_latest:
148+
latest_ver = get_latest_package_version(link, pkg_type, origin_name)
149+
if latest_ver:
150+
if oss_version and latest_ver != oss_version:
151+
logger.info(f'Fallback to latest version {latest_ver} (previous invalid: {oss_version})')
152+
elif not oss_version:
153+
logger.info(f'Using latest version {latest_ver} (no version detected)')
154+
oss_version = latest_ver
155+
156+
try:
157+
if oss_version:
158+
if pkg_type == 'maven' and direct_maven:
159+
# Skip if oss_name malformed
160+
if ':' in oss_name:
161+
parts = oss_name.split(':', 1)
162+
group_id, artifact_id = parts[0], parts[1]
163+
group_path = group_id.replace('.', '/')
164+
if (
165+
link.startswith('https://repo1.maven.org/maven2/') or
166+
link.startswith('http://repo1.maven.org/maven2/')
167+
):
168+
if not re.search(r'/\d[^/]*/*$', link.rstrip('/')):
169+
link = (
170+
f'https://repo1.maven.org/maven2/{group_path}/'
171+
f'{artifact_id}/{oss_version}'
172+
)
173+
elif (
174+
link.startswith('https://dl.google.com/android/maven2/') or
175+
link.startswith('http://dl.google.com/android/maven2/')
176+
):
177+
if not re.search(r'/\d[^/]*/*$', link.rstrip('/')):
178+
link = (
179+
f'https://dl.google.com/android/maven2/{group_path}/'
180+
f'{artifact_id}/{oss_version}/{artifact_id}-{oss_version}-sources.jar'
181+
)
182+
else:
183+
logger.debug(f'Skip maven normalization due to invalid oss_name: {oss_name}')
184+
else:
185+
link = get_new_link_with_version(link, pkg_type, origin_name, oss_version)
186+
except Exception as _e:
187+
logger.info(f'Failed to build versioned link for {oss_name or origin_name}:{oss_version} {_e}')
188+
189+
return oss_name, oss_version, link, pkg_type
190+
191+
192+
def parse_direct_maven_url(url):
193+
try:
194+
clean_url = url.replace('https://', '').replace('http://', '')
195+
if clean_url.startswith('repo1.maven.org/maven2/'):
196+
base_path = clean_url[len('repo1.maven.org/maven2/'):]
197+
elif clean_url.startswith('dl.google.com/android/maven2/'):
198+
base_path = clean_url[len('dl.google.com/android/maven2/'):]
199+
else:
200+
return None
201+
202+
base_path = base_path.rstrip('/')
203+
# Strip file name if ends with known artifact extension.
204+
if any(base_path.endswith(ext) for ext in ['.jar', '.pom', '.aar']):
205+
base_path = '/'.join(base_path.split('/')[:-1])
206+
207+
parts = base_path.split('/')
208+
if len(parts) < 2:
209+
return None
210+
211+
version = None
212+
artifact_id = None
213+
if len(parts) >= 3:
214+
potential_version = parts[-1]
215+
potential_artifact = parts[-2]
216+
if re.search(r'\d', potential_version):
217+
version = potential_version
218+
artifact_id = potential_artifact
219+
group_parts = parts[:-2]
220+
else:
221+
artifact_id = parts[-1]
222+
group_parts = parts[:-1]
223+
else:
224+
artifact_id = parts[-1]
225+
group_parts = parts[:-1]
226+
227+
group_id = '.'.join(group_parts)
228+
if not group_id or not artifact_id:
229+
return None
230+
231+
maven_name = f"{group_id}:{artifact_id}"
232+
return maven_name, version
233+
except Exception as e:
234+
logger.debug(f'Failed to parse direct Maven URL {url}: {e}')
235+
return None
127236

128237

129238
def get_new_link_with_version(link, pkg_type, oss_name, oss_version):
@@ -160,7 +269,45 @@ def get_latest_package_version(link, pkg_type, oss_name):
160269
if maven_response.status_code == 200:
161270
versions = maven_response.json().get('versions', [])
162271
if versions:
163-
cand = max(versions, key=lambda v: v.get('publishedAt', ''))
272+
# Some version entries may miss publishedAt; fallback to semantic version ordering.
273+
def sem_key(vstr: str):
274+
# Parse semantic version with optional prerelease label
275+
# Examples: 1.9.0, 1.10.0-alpha, 2.0.0-rc
276+
m = re.match(r'^(\d+)(?:\.(\d+))?(?:\.(\d+))?(?:[-.]([A-Za-z0-9]+))?$', vstr)
277+
if not m:
278+
return (0, 0, 0, 999)
279+
major = int(m.group(1) or 0)
280+
minor = int(m.group(2) or 0)
281+
patch = int(m.group(3) or 0)
282+
label = (m.group(4) or '').lower()
283+
# Assign label weights: stable > rc > beta > alpha
284+
label_weight_map = {
285+
'alpha': -3,
286+
'beta': -2,
287+
'rc': -1
288+
}
289+
weight = label_weight_map.get(label, 0 if label == '' else -4)
290+
return (major, minor, patch, weight)
291+
292+
with_pub = [v for v in versions if v.get('publishedAt')]
293+
if with_pub:
294+
cand = max(with_pub, key=lambda v: v.get('publishedAt'))
295+
else:
296+
decorated = []
297+
for v in versions:
298+
vkey = v.get('versionKey', {})
299+
ver = vkey.get('version', '')
300+
if ver:
301+
decorated.append((sem_key(ver), ver, v))
302+
if decorated:
303+
decorated.sort(key=lambda t: t[0])
304+
stable_candidates = [t for t in decorated if t[0][3] == 0]
305+
if stable_candidates:
306+
cand = stable_candidates[-1][2]
307+
else:
308+
cand = decorated[-1][2]
309+
else:
310+
cand = versions[-1]
164311
find_version = cand.get('versionKey', {}).get('version', '')
165312
elif pkg_type == 'pub':
166313
pub_response = requests.get(f'https://pub.dev/api/packages/{oss_name}')
@@ -188,7 +335,7 @@ def get_downloadable_url(link, checkout_version):
188335

189336
if pkg_type == "pypi":
190337
ret, result_link = get_download_location_for_pypi(new_link)
191-
elif pkg_type == "maven" or new_link.startswith('repo1.maven.org/'):
338+
elif pkg_type == "maven" or new_link.startswith('repo1.maven.org/') or new_link.startswith('dl.google.com/android/maven2/'):
192339
ret, result_link = get_download_location_for_maven(new_link)
193340
elif (pkg_type in ["npm", "npm2"]) or new_link.startswith('registry.npmjs.org/'):
194341
ret, result_link = get_download_location_for_npm(new_link)
@@ -365,6 +512,13 @@ def get_download_location_for_maven(link):
365512
return ret, new_link
366513
else:
367514
dn_loc = 'https://' + link
515+
elif link.startswith('dl.google.com/android/maven2/'):
516+
if link.endswith('.jar'):
517+
new_link = 'https://' + link
518+
ret = True
519+
return ret, new_link
520+
else:
521+
dn_loc = 'https://' + link
368522
else:
369523
raise Exception("not valid url for maven")
370524

src/fosslight_util/constant.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@
4040
"pypi": r'https?:\/\/pypi\.org\/project\/([^\/]+)[\/]?([^\/]*)',
4141
"pypi2": r'https?:\/\/files\.pythonhosted\.org\/packages\/source\/[\w]\/([^\/]+)\/[\S]+-([^\-]+)\.tar\.gz',
4242
"maven": r'https?:\/\/mvnrepository\.com\/artifact\/([^\/]+)\/([^\/]+)\/?([^\/]*)',
43+
"maven_repo1": r'https?:\/\/repo1\.maven\.org\/maven2\/(.*)',
44+
"maven_google": r'https?:\/\/dl\.google\.com\/android\/maven2\/(.*)',
4345
"npm": r'https?:\/\/www\.npmjs\.com\/package\/([^\/\@]+)(?:\/v\/)?([^\/]*)',
4446
"npm2": r'https?:\/\/www\.npmjs\.com\/package\/(\@[^\/]+\/[^\/]+)(?:\/v\/)?([^\/]*)',
4547
"pub": r'https?:\/\/pub\.dev\/packages\/([^\/]+)(?:\/versions\/)?([^\/]*)',

0 commit comments

Comments
 (0)