Skip to content

Commit b6140b0

Browse files
committed
Add Pypi download URL support
Signed-off-by: Tushar Goel <[email protected]>
1 parent 3090030 commit b6140b0

File tree

4 files changed

+198
-0
lines changed

4 files changed

+198
-0
lines changed

src/fetchcode/__init__.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ def fetch_http(url, location):
4444
`url` URL string saving the content in a file at `location`
4545
"""
4646
r = requests.get(url)
47+
4748
with open(location, "wb") as f:
4849
f.write(r.content)
4950

@@ -106,3 +107,17 @@ def fetch(url):
106107
return fetchers.get(scheme)(url, location)
107108

108109
raise Exception("Not a supported/known scheme.")
110+
111+
112+
def fetch_json_response(url):
113+
"""
114+
Fetch a JSON response from the given URL and return the parsed JSON data.
115+
"""
116+
response = requests.get(url)
117+
if response.status_code != 200:
118+
raise Exception(f"Failed to fetch {url}: {response.status_code} {response.reason}")
119+
120+
try:
121+
return response.json()
122+
except ValueError as e:
123+
raise Exception(f"Failed to parse JSON from {url}: {str(e)}")

src/fetchcode/download_urls.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# fetchcode is a free software tool from nexB Inc. and others.
2+
# Visit https://github.com/aboutcode-org/fetchcode for support and download.
3+
#
4+
# Copyright (c) nexB Inc. and others. All rights reserved.
5+
# http://nexb.com and http://aboutcode.org
6+
#
7+
# This software is licensed under the Apache License version 2.0.
8+
#
9+
# You may not use this software except in compliance with the License.
10+
# You may obtain a copy of the License at:
11+
# http://apache.org/licenses/LICENSE-2.0
12+
# Unless required by applicable law or agreed to in writing, software distributed
13+
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
14+
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations under the License.
16+
17+
from packageurl.contrib.route import NoRouteAvailable
18+
from packageurl.contrib.route import Router
19+
20+
from fetchcode.pypi import Pypi
21+
22+
package_registry = [
23+
Pypi,
24+
]
25+
26+
router = Router()
27+
28+
for pkg_class in package_registry:
29+
router.append(pattern=pkg_class.purl_pattern, endpoint=pkg_class.get_download_url)
30+
31+
32+
def download_url(purl):
33+
"""
34+
Return package metadata for a URL or PURL.
35+
Return None if there is no URL, or the URL or PURL is not supported.
36+
"""
37+
if purl:
38+
try:
39+
return router.process(purl)
40+
except NoRouteAvailable:
41+
return

src/fetchcode/pypi.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# fetchcode is a free software tool from nexB Inc. and others.
2+
# Visit https://github.com/aboutcode-org/fetchcode for support and download.
3+
#
4+
# Copyright (c) nexB Inc. and others. All rights reserved.
5+
# http://nexb.com and http://aboutcode.org
6+
#
7+
# This software is licensed under the Apache License version 2.0.
8+
#
9+
# You may not use this software except in compliance with the License.
10+
# You may obtain a copy of the License at:
11+
# http://apache.org/licenses/LICENSE-2.0
12+
# Unless required by applicable law or agreed to in writing, software distributed
13+
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
14+
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations under the License.
16+
17+
from urllib.parse import urljoin
18+
19+
from packageurl import PackageURL
20+
21+
from fetchcode import fetch_json_response
22+
23+
24+
class Pypi:
25+
"""
26+
This class handles Cargo PURLs.
27+
"""
28+
29+
purl_pattern = "pkg:pypi/.*"
30+
base_url = "https://pypi.org/pypi/"
31+
32+
@classmethod
33+
def get_download_url(cls, purl):
34+
"""
35+
Return the download URL for a Pypi PURL.
36+
"""
37+
purl = PackageURL.from_string(purl)
38+
39+
name = purl.name
40+
version = purl.version
41+
42+
if not name or not version:
43+
raise ValueError("Pypi PURL must specify a name and version")
44+
45+
url = urljoin(cls.base_url, f"{name}/{version}.json")
46+
47+
data = fetch_json_response(url)
48+
49+
download_urls = data.get("urls", [{}])
50+
51+
if not download_urls:
52+
raise ValueError(f"No download URLs found for {name} version {version}")
53+
54+
download_url = next(
55+
(url["url"] for url in download_urls if url.get("url")),
56+
None
57+
)
58+
59+
if not download_url:
60+
raise ValueError(f"No download URL found for {name} version {version}")
61+
62+
return download_url

tests/test_pypi.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import unittest
2+
from unittest.mock import patch
3+
from fetchcode.pypi import Pypi
4+
5+
class TestGetDownloadURL(unittest.TestCase):
6+
7+
@patch("fetchcode.pypi.fetch_json_response")
8+
def test_valid_purl_returns_download_url(self, mock_fetch_json_response):
9+
mock_response = {
10+
"urls": [
11+
{
12+
"url": "https://files.pythonhosted.org/packages/source/r/requests/requests-2.31.0.tar.gz"
13+
}
14+
]
15+
}
16+
mock_fetch_json_response.return_value = mock_response
17+
18+
purl = "pkg:pypi/[email protected]"
19+
result = Pypi.get_download_url(purl)
20+
self.assertEqual(
21+
result,
22+
"https://files.pythonhosted.org/packages/source/r/requests/requests-2.31.0.tar.gz"
23+
)
24+
25+
@patch("fetchcode.pypi.fetch_json_response")
26+
def test_missing_version_raises_value_error(self, mock_fetch_json_response):
27+
purl = "pkg:pypi/requests"
28+
with self.assertRaises(ValueError) as context:
29+
Pypi.get_download_url(purl)
30+
self.assertIn("Pypi PURL must specify a name and version", str(context.exception))
31+
32+
@patch("fetchcode.pypi.fetch_json_response")
33+
def test_missing_name_raises_value_error(self, mock_fetch_json_response):
34+
purl = "pkg:pypi/@2.31.0"
35+
with self.assertRaises(ValueError) as context:
36+
Pypi.get_download_url(purl)
37+
self.assertIn("purl is missing the required name component", str(context.exception))
38+
39+
@patch("fetchcode.pypi.fetch_json_response")
40+
def test_missing_urls_field_raises_value_error(self, mock_fetch_json_response):
41+
mock_fetch_json_response.return_value = {}
42+
purl = "pkg:pypi/[email protected]"
43+
with self.assertRaises(ValueError) as context:
44+
Pypi.get_download_url(purl)
45+
self.assertIn("No download URL found", str(context.exception))
46+
47+
@patch("fetchcode.pypi.fetch_json_response")
48+
def test_empty_urls_list_raises_value_error(self, mock_fetch_json_response):
49+
mock_fetch_json_response.return_value = {"urls": []}
50+
purl = "pkg:pypi/[email protected]"
51+
with self.assertRaises(ValueError) as context:
52+
Pypi.get_download_url(purl)
53+
self.assertIn("No download URLs found", str(context.exception))
54+
55+
@patch("fetchcode.pypi.fetch_json_response")
56+
def test_first_url_object_missing_url_key(self, mock_fetch_json_response):
57+
mock_fetch_json_response.return_value = {
58+
"urls": [{}]
59+
}
60+
purl = "pkg:pypi/[email protected]"
61+
with self.assertRaises(ValueError) as context:
62+
Pypi.get_download_url(purl)
63+
self.assertIn("No download URL found", str(context.exception))
64+
65+
@patch("fetchcode.pypi.fetch_json_response")
66+
def test_url_fallback_when_multiple_urls_provided(self, mock_fetch_json_response):
67+
mock_fetch_json_response.return_value = {
68+
"urls": [
69+
{},
70+
{"url": "https://example.com/fallback-url.tar.gz"}
71+
]
72+
}
73+
74+
purl = "pkg:pypi/[email protected]"
75+
download_url = Pypi.get_download_url(purl)
76+
self.assertEqual(download_url, "https://example.com/fallback-url.tar.gz")
77+
78+
def test_malformed_purl_raises_exception(self):
79+
with self.assertRaises(ValueError):
80+
Pypi.get_download_url("this-is-not-a-valid-purl")

0 commit comments

Comments
 (0)