Skip to content

Commit 6d02afb

Browse files
committed
fix: Sanitize URLs to private repositories to not contain credentials
URLs to private repositories usually contain credentials. Signed-off-by: Nicolas Nobelis <nicolas.nobelis@bosch.com>
1 parent c7b6bd0 commit 6d02afb

File tree

1 file changed

+18
-2
lines changed

1 file changed

+18
-2
lines changed

src/python_inspector/package_data.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,22 @@ async def get_pypi_data_from_purl(
103103
valid_distribution_urls.insert(0, wheel_url)
104104

105105
urls = {url.get("url"): url for url in response.get("urls") or []}
106+
107+
def remove_credentials_from_url(url: str):
108+
# Parse the URL into its components
109+
parsed = urlparse(url)
110+
111+
new_netloc = parsed.hostname
112+
if parsed.port:
113+
new_netloc += f":{parsed.port}"
114+
115+
# Create a new parsed result object, replacing the old netloc
116+
# with our new one that has no credentials.
117+
parsed = parsed._replace(netloc=new_netloc)
118+
url_without_credentials = urlunparse(parsed)
119+
120+
return url_without_credentials
121+
106122
# iterate over the valid distribution urls and return the first
107123
# one that is matching.
108124
for dist_url in valid_distribution_urls:
@@ -116,12 +132,12 @@ async def get_pypi_data_from_purl(
116132
primary_language="Python",
117133
description=get_description(info),
118134
homepage_url=homepage_url,
119-
api_data_url=api_url,
135+
api_data_url=remove_credentials_from_url(api_url),
120136
bug_tracking_url=bug_tracking_url,
121137
code_view_url=code_view_url,
122138
license_expression=info.get("license_expression"),
123139
declared_license=get_declared_license(info),
124-
download_url=dist_url,
140+
download_url=remove_credentials_from_url(dist_url),
125141
size=url_data.get("size"),
126142
md5=digests.get("md5") or url_data.get("md5_digest"),
127143
sha256=digests.get("sha256"),

0 commit comments

Comments
 (0)