66# See https://aboutcode.org for more information about AboutCode FOSS projects.
77#
88
9+ from contextlib import suppress
910from pathlib import Path
1011from urllib .parse import unquote
1112from urllib .parse import urlparse
1415from django .utils .http import parse_header_parameters
1516
1617import requests
18+ from packageurl import PackageURL
19+ from packageurl .contrib import purl2url
1720
1821from dejacode_toolkit .utils import md5
1922from dejacode_toolkit .utils import sha1
2023from dejacode_toolkit .utils import sha256
2124from dejacode_toolkit .utils import sha512
2225
2326CONTENT_MAX_LENGTH = 536870912 # 512 MB
27+ DEFAULT_TIMEOUT = 5
2428
2529
2630class DataCollectionException (Exception ):
@@ -29,7 +33,7 @@ class DataCollectionException(Exception):
2933
3034def collect_package_data (url ):
3135 try :
32- response = requests .get (url , timeout = 5 , stream = True )
36+ response = requests .get (url , timeout = DEFAULT_TIMEOUT , stream = True )
3337 except (TimeoutError , requests .RequestException ) as e :
3438 raise DataCollectionException (e )
3539
@@ -73,3 +77,94 @@ def collect_package_data(url):
7377 }
7478
7579 return package_data
80+
81+
82+ class PyPIFetcher :
83+ """
84+ Handle PyPI Package URL (PURL) resolution and download URL retrieval.
85+
86+ Adapted from fetchcode
87+ https://github.com/aboutcode-org/fetchcode/issues/190
88+ """
89+
90+ purl_pattern = "pkg:pypi/.*"
91+ base_url = "https://pypi.org/pypi"
92+
93+ @staticmethod
94+ def fetch_json_response (url ):
95+ """Fetch a JSON response from the given URL and return the parsed JSON data."""
96+ response = requests .get (url , timeout = DEFAULT_TIMEOUT )
97+ if response .status_code != 200 :
98+ raise Exception (f"Failed to fetch { url } : { response .status_code } { response .reason } " )
99+
100+ try :
101+ return response .json ()
102+ except ValueError as e :
103+ raise Exception (f"Failed to parse JSON from { url } : { str (e )} " )
104+
105+ @classmethod
106+ def get_package_data (cls , purl ):
107+ """Fetch package data from PyPI API."""
108+ parsed_purl = PackageURL .from_string (purl )
109+
110+ if parsed_purl .version :
111+ api_url = f"{ cls .base_url } /{ parsed_purl .name } /{ parsed_purl .version } /json"
112+ else :
113+ api_url = f"{ cls .base_url } /{ parsed_purl .name } /json"
114+
115+ return cls .fetch_json_response (api_url )
116+
117+ @classmethod
118+ def get_urls_info (cls , purl ):
119+ """Collect URL info dicts from PyPI API."""
120+ data = cls .get_package_data (purl )
121+ return data .get ("urls" , [])
122+
123+ @classmethod
124+ def get_download_url (cls , purl , preferred_type = "sdist" ):
125+ """
126+ Get a single download URL from PyPI API.
127+ If no version is specified in the PURL, fetches the latest version.
128+ """
129+ urls_info = cls .get_urls_info (purl )
130+
131+ if not urls_info :
132+ return
133+
134+ for url_info in urls_info :
135+ if url_info .get ("packagetype" ) == preferred_type :
136+ return url_info ["url" ]
137+
138+ return urls_info [0 ]["url" ]
139+
140+ @classmethod
141+ def get_all_download_urls (cls , purl ):
142+ """
143+ Get all download URLs from PyPI API.
144+ If no version is specified in the PURL, fetches the latest version.
145+ """
146+ urls_info = cls .get_urls_info (purl )
147+ return [url_info ["url" ] for url_info in urls_info if "url" in url_info ]
148+
149+
150+ def infer_download_url (purl ):
151+ """
152+ Infer the download URL for a package from its Package URL (purl).
153+
154+ Attempts resolution via ``purl2url`` first. Falls back to package-type-specific
155+ resolvers (which may make HTTP requests) when ``purl2url`` cannot resolve the URL.
156+ """
157+ if isinstance (purl , PackageURL ):
158+ purl_data = purl
159+ purl_str = str (purl )
160+ else :
161+ purl_data = PackageURL .from_string (purl )
162+ purl_str = purl
163+
164+ if download_url := purl2url .get_download_url (purl_str ):
165+ return download_url
166+
167+ # PyPI is not supported by ``purl2url``, it requires an API call to resolve download URLs.
168+ if purl_data .type == "pypi" :
169+ with suppress (Exception ):
170+ return PyPIFetcher .get_download_url (purl_str , preferred_type = "sdist" )
0 commit comments