Skip to content

Commit a2e3322

Browse files
authored
feat: strip authentication secrets from download/registry urls (#647)
strip secret authentication information from registries and download urls - in accordance with PEP 610 > `url` MUST be stripped of any sensitive authentication information, for security reasons. > [...] > Additionally, the user:password section of the URL MAY be a well-known, non security sensitive string. --------- Signed-off-by: Jan Kowalleck <jan.kowalleck@gmail.com>
1 parent bd401d6 commit a2e3322

File tree

113 files changed

+5943
-8399
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

113 files changed

+5943
-8399
lines changed

cyclonedx_py/_internal/pipenv.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
from .utils.args import arparse_split
3434
from .utils.cdx import make_bom
3535
from .utils.pyproject import pyproject_file2component
36+
from .utils.secret import redact_auth_from_url
3637

3738
if TYPE_CHECKING: # pragma: no cover
3839
from logging import Logger
@@ -141,9 +142,12 @@ def _make_bom(self, root_c: Optional['Component'],
141142
self._logger.debug('root-component: %r', root_c)
142143

143144
meta: NameDict = locker[self.__LOCKFILE_META]
144-
source_urls: Dict[str, str] = {source['name']: source['url'].rstrip('/') for source in meta.get('sources', ())}
145+
source_urls: Dict[str, str] = {
146+
source['name']: redact_auth_from_url(source['url']).rstrip('/')
147+
for source in meta.get('sources', ())
148+
}
145149
if self._pypi_url is not None:
146-
source_urls['pypi'] = self._pypi_url.rstrip('/')
150+
source_urls['pypi'] = redact_auth_from_url(self._pypi_url).rstrip('/')
147151

148152
all_components: Dict[str, Component] = {}
149153
if root_c:
@@ -223,7 +227,7 @@ def __make_extrefs(self, name: str, data: 'NameDict', source_urls: Dict[str, str
223227
vcs_source = self.__package_vcs(data)
224228
try:
225229
if vcs_source is not None:
226-
vcs_source_url = vcs_source[1]
230+
vcs_source_url = redact_auth_from_url(vcs_source[1])
227231
yield ExternalReference(
228232
comment=f'from {vcs_source[0]}',
229233
type=ExternalReferenceType.VCS,
@@ -232,7 +236,7 @@ def __make_extrefs(self, name: str, data: 'NameDict', source_urls: Dict[str, str
232236
yield ExternalReference(
233237
comment='from file',
234238
type=ExternalReferenceType.DISTRIBUTION,
235-
url=XsUri(data['file']),
239+
url=XsUri(redact_auth_from_url(data['file'])),
236240
hashes=hashes)
237241
elif 'path' in data:
238242
yield ExternalReference(
@@ -263,16 +267,16 @@ def __purl_qualifiers4lock(self, data: 'NameDict', sourcees: Dict[str, str]) ->
263267
# see section 3.7.4 in https://github.com/spdx/spdx-spec/blob/cfa1b9d08903/chapters/3-package-information.md
264268
# > For version-controlled files, the VCS location syntax is similar to a URL and has the:
265269
# > `<vcs_tool>+<transport>://<host_name>[/<path_to_repository>][@<revision_tag_or_branch>][#<sub_path>]`
266-
qs['vcs_url'] = f'{vcs_source[1]}@{data["ref"]}'
270+
qs['vcs_url'] = f'{redact_auth_from_url(vcs_source[1])}@{data["ref"]}'
267271
elif 'file' in data:
268272
if '://files.pythonhosted.org/' not in data['file']:
269273
# skip PURL bloat, do not add implicit information
270-
qs['download_url'] = data['file']
274+
qs['download_url'] = redact_auth_from_url(data['file'])
271275
elif 'index' in data:
272276
source_url = sourcees.get(data['index'], 'https://pypi.org/simple')
273277
if '://pypi.org/' not in source_url:
274278
# skip PURL bloat, do not add implicit information
275-
qs['repository_url'] = source_url
279+
qs['repository_url'] = redact_auth_from_url(source_url.rstrip('/'))
276280
return qs
277281

278282
def __make_dependency_graph(self) -> None:

cyclonedx_py/_internal/poetry.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
from .cli_common import add_argument_mc_type
3434
from .utils.cdx import make_bom
3535
from .utils.poetry import poetry2component
36+
from .utils.secret import redact_auth_from_url
3637
from .utils.toml import toml_loads
3738

3839
if TYPE_CHECKING: # pragma: no cover
@@ -387,17 +388,17 @@ def __purl_qualifiers4lock(self, package: 'NameDict') -> 'NameDict':
387388
# see section 3.7.4 in https://github.com/spdx/spdx-spec/blob/cfa1b9d08903/chapters/3-package-information.md
388389
# > For version-controlled files, the VCS location syntax is similar to a URL and has the:
389390
# > `<vcs_tool>+<transport>://<host_name>[/<path_to_repository>][@<revision_tag_or_branch>][#<sub_path>]`
390-
qs['vcs_url'] = f'{source["type"]}+{source["url"]}@' + \
391+
qs['vcs_url'] = f'{source["type"]}+{redact_auth_from_url(source["url"])}@' + \
391392
source.get('resolved_reference', source.get('reference', ''))
392393
elif source_type == 'url':
393394
if '://files.pythonhosted.org/' not in source['url']:
394395
# skip PURL bloat, do not add implicit information
395-
qs['download_url'] = source['url']
396+
qs['download_url'] = redact_auth_from_url(source['url'])
396397
elif source_type == 'legacy':
397398
source_url = package['source'].get('url', 'https://pypi.org/simple')
398399
if '://pypi.org/' not in source_url:
399400
# skip PURL bloat, do not add implicit information
400-
qs['repository_url'] = source_url
401+
qs['repository_url'] = redact_auth_from_url(source_url)
401402

402403
return qs
403404

@@ -415,7 +416,7 @@ def __extrefs4lock(self, package: 'NameDict') -> Generator['ExternalReference',
415416
yield from self.__extrefs4lock_vcs(package)
416417

417418
def __extrefs4lock_legacy(self, package: 'NameDict') -> Generator['ExternalReference', None, None]:
418-
source_url = package['source'].get('url', 'https://pypi.org/simple')
419+
source_url = redact_auth_from_url(package['source'].get('url', 'https://pypi.org/simple'))
419420
for file in package['files']:
420421
try:
421422
yield ExternalReference(
@@ -433,7 +434,7 @@ def __extrefs4lock_url(self, package: 'NameDict') -> Generator['ExternalReferenc
433434
yield ExternalReference(
434435
comment='from url',
435436
type=ExternalReferenceType.DISTRIBUTION,
436-
url=XsUri(package['source']['url']),
437+
url=XsUri(redact_auth_from_url(package['source']['url'])),
437438
hashes=[HashType.from_composite_str(package['files'][0]['hash'])] if len(package['files']) else None
438439
)
439440
except (InvalidUriException, UnknownHashTypeException) as error: # pragma: nocover
@@ -444,7 +445,7 @@ def __extrefs4lock_file(self, package: 'NameDict') -> Generator['ExternalReferen
444445
yield ExternalReference(
445446
comment='from file',
446447
type=ExternalReferenceType.DISTRIBUTION,
447-
url=XsUri(package['source']['url']),
448+
url=XsUri(redact_auth_from_url(package['source']['url'])),
448449
hashes=[HashType.from_composite_str(package['files'][0]['hash'])] if len(package['files']) else None
449450
)
450451
except (InvalidUriException, UnknownHashTypeException) as error: # pragma: nocover
@@ -455,7 +456,7 @@ def __extrefs4lock_directory(self, package: 'NameDict') -> Generator['ExternalRe
455456
yield ExternalReference(
456457
comment='from directory',
457458
type=ExternalReferenceType.DISTRIBUTION,
458-
url=XsUri(package['source']['url'])
459+
url=XsUri(redact_auth_from_url(package['source']['url']))
459460
# no hash for a source-directory
460461
)
461462
except InvalidUriException as error: # pragma: nocover
@@ -468,7 +469,7 @@ def __extrefs4lock_vcs(self, package: 'NameDict') -> Generator['ExternalReferenc
468469
yield ExternalReference(
469470
comment='from VCS',
470471
type=ExternalReferenceType.VCS,
471-
url=XsUri(f'{source["type"]}+{source["url"]}#{vcs_ref}')
472+
url=XsUri(f'{source["type"]}+{redact_auth_from_url(source["url"])}#{vcs_ref}')
472473
# no hashes, has source.resolved_reference instead, which is a property
473474
)
474475
except InvalidUriException as error: # pragma: nocover

cyclonedx_py/_internal/requirements.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,10 @@
1818

1919
from argparse import OPTIONAL, ArgumentParser
2020
from functools import reduce
21+
from itertools import chain
2122
from os import unlink
2223
from textwrap import dedent
23-
from typing import TYPE_CHECKING, Any, Generator, List, Optional, Set
24+
from typing import TYPE_CHECKING, Any, Generator, Iterable, Optional, Set
2425

2526
from cyclonedx.exception.model import InvalidUriException, UnknownHashTypeException
2627
from cyclonedx.model import ExternalReference, ExternalReferenceType, HashType, Property, XsUri
@@ -33,6 +34,7 @@
3334
from .utils.cdx import make_bom
3435
from .utils.io import io2file
3536
from .utils.pyproject import pyproject_file2component
37+
from .utils.secret import redact_auth_from_url
3638

3739
if TYPE_CHECKING: # pragma: no cover
3840
from logging import Logger
@@ -97,11 +99,11 @@ def make_argument_parser(**kwargs: Any) -> 'ArgumentParser':
9799
def __init__(self, *,
98100
logger: 'Logger',
99101
index_url: str,
100-
extra_index_urls: List[str],
102+
extra_index_urls: Iterable[str],
101103
**__: Any) -> None:
102104
self._logger = logger
103105
self._index_url = index_url
104-
self._extra_index_urls = set(extra_index_urls)
106+
self._extra_index_urls = tuple(extra_index_urls)
105107

106108
def __call__(self, *, # type:ignore[override]
107109
requirements_file: str,
@@ -136,9 +138,15 @@ def _make_bom(self, root_c: Optional['Component'], rf: 'RequirementsFile') -> 'B
136138
return bom
137139

138140
def _add_components(self, bom: 'Bom', rf: 'RequirementsFile') -> None:
139-
index_url = reduce(lambda c, i: i.options.get('index_url') or c, rf.options, self._index_url)
140-
extra_index_urls = self._extra_index_urls.union(*(
141-
i.options['extra_index_urls'] for i in rf.options if 'extra_index_urls' in i.options))
141+
index_url = redact_auth_from_url(reduce(
142+
lambda c, i: i.options.get('index_url') or c, rf.options, self._index_url
143+
).rstrip('/'))
144+
extra_index_urls = set(map(
145+
lambda u: redact_auth_from_url(u.rstrip('/')),
146+
chain(self._extra_index_urls, chain.from_iterable(
147+
i.options['extra_index_urls'] for i in rf.options if 'extra_index_urls' in i.options
148+
))
149+
))
142150
self._logger.debug('index_url = %r', index_url)
143151
self._logger.debug('extra_index_urls = %r', extra_index_urls)
144152

@@ -178,11 +186,12 @@ def _make_component(self, req: 'InstallRequirement',
178186
elif req.is_url:
179187
if '://files.pythonhosted.org/' not in req.link.url:
180188
# skip PURL bloat, do not add implicit information
181-
purl_qualifiers['vcs_url' if req.is_vcs_url else 'download_url'] = req.link.url
189+
purl_qualifiers['vcs_url' if req.is_vcs_url else 'download_url'] = redact_auth_from_url(
190+
req.link.url)
182191
external_references.append(ExternalReference(
183192
comment='explicit dist url',
184193
type=ExternalReferenceType.VCS if req.is_vcs_url else ExternalReferenceType.DISTRIBUTION,
185-
url=XsUri(req.link.url),
194+
url=XsUri(redact_auth_from_url(req.link.url)),
186195
hashes=hashes))
187196
else:
188197
# url based on https://warehouse.pypa.io/api-reference/legacy.html
@@ -203,7 +212,7 @@ def _make_component(self, req: 'InstallRequirement',
203212

204213
return Component(
205214
bom_ref=f'requirements-L{req.line_number}',
206-
description=f'requirements line {req.line_number}: {req.line}',
215+
description=f'requirements line {req.line_number}: {redact_auth_from_url(req.line)}',
207216
type=ComponentType.LIBRARY,
208217
name=name or 'unknown',
209218
version=version,
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# This file is part of CycloneDX Python Lib
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
#
15+
# SPDX-License-Identifier: Apache-2.0
16+
# Copyright (c) OWASP Foundation. All Rights Reserved.
17+
18+
from re import compile as re_compile
19+
20+
_URL_AUTH_MATCHER = re_compile(r'(?<=://)[^/@:]+:[^/@]+@')
21+
_URL_AUTH_REPLACE = '' # drop auth - in accordance with PEP 610
22+
23+
24+
def redact_auth_from_url(s: str) -> str:
25+
# is intended to work on any string that contains an url.
26+
return _URL_AUTH_MATCHER.sub(_URL_AUTH_REPLACE, s) \
27+
if '@' in s else s

tests/__init__.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,12 @@
1616
# Copyright (c) OWASP Foundation. All Rights Reserved.
1717

1818

19-
import re
2019
from json import dumps as json_dumps
2120
from os import getenv
2221
from os.path import dirname, join
2322
from pathlib import Path
23+
from re import sub as re_sub
24+
from sys import stderr
2425
from typing import Union
2526
from unittest import TestCase
2627
from xml.sax.saxutils import escape as xml_escape, quoteattr as xml_quoteattr # nosec:B406
@@ -31,11 +32,11 @@
3132

3233
RECREATE_SNAPSHOTS = '1' == getenv('CDX_TEST_RECREATE_SNAPSHOTS')
3334
if RECREATE_SNAPSHOTS:
34-
print('!!! WILL RECREATE ALL SNAPSHOTS !!!')
35+
print('!!! WILL RECREATE ALL SNAPSHOTS !!!', file=stderr)
3536

3637
INIT_TESTBEDS = '1' != getenv('CDX_TEST_SKIP_INIT_TESTBEDS')
3738
if INIT_TESTBEDS:
38-
print('!!! WILL INIT TESTBEDS !!!')
39+
print('!!! WILL INIT TESTBEDS !!!', file=stderr)
3940

4041
_TESTDATA_DIRECTORY = join(dirname(__file__), '_data')
4142

@@ -102,15 +103,15 @@ def make_xml_comparable(bom: str) -> str:
102103
' <vendor>CycloneDX</vendor>\n'
103104
' <name>cyclonedx-bom</name>\n'
104105
' <version>thisVersion-testing</version>')
105-
bom = re.sub( # replace metadata.tools.version
106+
bom = re_sub( # replace metadata.tools.version
106107
' <vendor>CycloneDX</vendor>\n'
107108
' <name>cyclonedx-python-lib</name>\n'
108109
' <version>.*?</version>',
109110
' <vendor>CycloneDX</vendor>\n'
110111
' <name>cyclonedx-python-lib</name>\n'
111112
' <version>libVersion-testing</version>',
112113
bom)
113-
bom = re.sub( # replace metadata.tools.externalReferences
114+
bom = re_sub( # replace metadata.tools.externalReferences
114115
' <vendor>CycloneDX</vendor>\n'
115116
' <name>cyclonedx-python-lib</name>\n'
116117
r' <version>(.*?)</version>\n'
@@ -132,15 +133,15 @@ def make_json_comparable(bom: str) -> str:
132133
' "name": "cyclonedx-bom",\n'
133134
' "vendor": "CycloneDX",\n'
134135
' "version": "thisVersion-testing"')
135-
bom = re.sub( # replace metadata.tools.version
136+
bom = re_sub( # replace metadata.tools.version
136137
' "name": "cyclonedx-python-lib",\n'
137138
' "vendor": "CycloneDX",\n'
138139
' "version": ".*?"',
139140
' "name": "cyclonedx-python-lib",\n'
140141
' "vendor": "CycloneDX",\n'
141142
' "version": "libVersion-testing"',
142143
bom)
143-
bom = re.sub( # replace metadata.tools.externalReferences
144+
bom = re_sub( # replace metadata.tools.externalReferences
144145
r' "externalReferences": \[[\s\S]*?\],\n'
145146
' "name": "cyclonedx-python-lib",\n'
146147
' "vendor": "CycloneDX"',

tests/_data/infiles/_helpers/pypi-proxy.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,17 +22,17 @@
2222
This might be needed to play certain setups.
2323
"""
2424

25-
import sys
2625
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
2726
from os import unlink
27+
from sys import argv, stderr
2828
from urllib.request import urlretrieve
2929

3030

3131
class PypiProxyReqHandler(BaseHTTPRequestHandler):
3232
def do_GET(self) -> None: # noqa:N802
33-
print('> ', self.path, file=sys.stderr)
33+
print('PyPI-PROXY > ', self.path, file=stderr)
3434
p, m = urlretrieve(f'https://pypi.org{self.path}') # nosec B310
35-
print('< ', p, file=sys.stderr)
35+
print('PyPI-PROXY < ', p, file=stderr)
3636
self.send_response(200)
3737
for k, v in m.items():
3838
self.send_header(k, v)
@@ -42,11 +42,15 @@ def do_GET(self) -> None: # noqa:N802
4242
unlink(p)
4343

4444

45+
def make_proxy(port: int) -> ThreadingHTTPServer:
46+
server_address = ('', port)
47+
return ThreadingHTTPServer(server_address, PypiProxyReqHandler)
48+
49+
4550
if __name__ == '__main__':
46-
server_address = ('', int(sys.argv[1]) if len(sys.argv) >= 2 else 8080)
47-
httpd = ThreadingHTTPServer(server_address, PypiProxyReqHandler)
48-
print(f'running PyPI proxy at: {server_address!r}', file=sys.stderr)
51+
proxy = make_proxy(int(argv[1]) if len(argv) >= 2 else 8080)
52+
print(f'running PyPI proxy at: {proxy.server_address!r}', file=stderr)
4953
try:
50-
httpd.serve_forever()
54+
proxy.serve_forever()
5155
except KeyboardInterrupt:
52-
httpd.server_close()
56+
proxy.server_close()

0 commit comments

Comments
 (0)