Skip to content

Commit be96d52

Browse files
authored
fix: package name normalization (#652)
use package name normalization everywhere ; https://packaging.python.org/en/latest/specifications/name-normalization/ THis should fix issues with mismatched or wronl/alternative-written(but compatible) package names. --------- Signed-off-by: Jan Kowalleck <jan.kowalleck@gmail.com>
1 parent eb21b8b commit be96d52

File tree

63 files changed

+8356
-24
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+8356
-24
lines changed

cyclonedx_py/_internal/environment.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
from . import BomBuilder, PropertyName
3535
from .cli_common import add_argument_mc_type, add_argument_pyproject
3636
from .utils.cdx import licenses_fixup, make_bom
37-
from .utils.packaging import metadata2extrefs, metadata2licenses
37+
from .utils.packaging import metadata2extrefs, metadata2licenses, normalize_packagename
3838
from .utils.pep610 import PackageSourceArchive, PackageSourceVcs, packagesource2extref, packagesource4dist
3939
from .utils.pyproject import pyproject2component, pyproject2dependencies, pyproject_load
4040

@@ -129,7 +129,7 @@ def __call__(self, *, # type:ignore[override]
129129
pyproject = pyproject_load(pyproject_file)
130130
root_c = pyproject2component(pyproject, type=mc_type)
131131
root_c.bom_ref.value = 'root-component'
132-
root_d = pyproject2dependencies(pyproject)
132+
root_d = tuple(pyproject2dependencies(pyproject))
133133
rc = (root_c, root_d)
134134

135135
path: List[str]
@@ -166,20 +166,23 @@ def __add_components(self, bom: 'Bom',
166166
)
167167
del dist_meta, dist_name, dist_version
168168
self.__component_add_extred_and_purl(component, packagesource4dist(dist))
169-
all_components[component.name.lower()] = component, map(Requirement, dist.requires or ())
169+
all_components[normalize_packagename(component.name)] = (
170+
component,
171+
tuple(map(Requirement, dist.requires or ()))
172+
)
170173

171174
self._logger.info('add component for package %r', component.name)
172175
self._logger.debug('add component: %r', component)
173176
bom.components.add(component)
174177

175178
if rc is not None:
176179
root_c = rc[0]
177-
root_c_lcname = root_c.name.lower()
178-
root_c_existed = all_components.get(root_c_lcname)
180+
root_c_nname = normalize_packagename(root_c.name)
181+
root_c_existed = all_components.get(root_c_nname)
179182
if root_c_existed is not None:
180183
bom.components.remove(root_c_existed[0])
181184
del root_c_existed
182-
all_components[root_c_lcname] = rc
185+
all_components[root_c_nname] = rc
183186
bom.metadata.component = root_c
184187
self._logger.debug('root-component: %r', root_c)
185188

@@ -189,7 +192,7 @@ def __finalize_dependencies(self, bom: 'Bom', all_components: 'T_AllComponents')
189192
for component, requires in all_components.values():
190193
component_deps: List[Component] = []
191194
for req in requires:
192-
req_component: Optional[Component] = all_components.get(req.name.lower(), (None,))[0]
195+
req_component: Optional[Component] = all_components.get(normalize_packagename(req.name), (None,))[0]
193196
if req_component is None:
194197
continue
195198
if req_component is component:

cyclonedx_py/_internal/pipenv.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from .cli_common import add_argument_mc_type, add_argument_pyproject
3333
from .utils.args import arparse_split
3434
from .utils.cdx import make_bom
35+
from .utils.packaging import normalize_packagename
3536
from .utils.pyproject import pyproject_file2component
3637
from .utils.secret import redact_auth_from_url
3738

@@ -151,16 +152,17 @@ def _make_bom(self, root_c: Optional['Component'],
151152

152153
all_components: Dict[str, Component] = {}
153154
if root_c:
154-
# root for self-installs
155-
all_components[root_c.name] = root_c
155+
# root for possible self-installs
156+
all_components[normalize_packagename(root_c.name)] = root_c
156157
for group_name in use_groups:
157158
self._logger.debug('processing group %r ...', group_name)
158159
for package_name, package_data in locker.get(group_name, {}).items():
159-
if package_name in all_components:
160-
component = all_components[package_name]
160+
package_name_normalized = normalize_packagename(package_name)
161+
if package_name_normalized in all_components:
162+
component = all_components[package_name_normalized]
161163
self._logger.info('existing component for package %r', package_name)
162164
else:
163-
component = all_components[package_name] = Component(
165+
component = all_components[package_name_normalized] = Component(
164166
bom_ref=f'{package_name}{package_data.get("version", "")}',
165167
type=ComponentType.LIBRARY,
166168
name=package_name,

cyclonedx_py/_internal/poetry.py

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from . import BomBuilder, PropertyName
3333
from .cli_common import add_argument_mc_type
3434
from .utils.cdx import make_bom
35+
from .utils.packaging import normalize_packagename
3536
from .utils.poetry import poetry2component
3637
from .utils.secret import redact_auth_from_url
3738
from .utils.toml import toml_loads
@@ -229,9 +230,9 @@ def _make_bom(self, project: 'NameDict', locker: 'NameDict',
229230
) for extra in use_extras)
230231
self._logger.debug('root-component: %r', root_c)
231232

232-
lock_data: Dict[str, _LockEntry] = {le.name.lower(): le for le in self._parse_lock(locker)}
233+
lock_data: Dict[str, _LockEntry] = {normalize_packagename(le.name): le for le in self._parse_lock(locker)}
233234

234-
lock_data[root_c.name] = _LockEntry( # needed for circle dependencies
235+
lock_data[normalize_packagename(root_c.name)] = _LockEntry( # needed for circle dependencies
235236
name=root_c.name,
236237
component=root_c,
237238
dependencies=set(),
@@ -243,15 +244,10 @@ def _make_bom(self, project: 'NameDict', locker: 'NameDict',
243244

244245
_dep_pattern = re_compile(r'^(?P<name>[^\[]+)(?:\[(?P<extras>.*)\])?$')
245246

246-
lock_version = self._get_lockfile_version(locker)
247-
should_tidy_lock_names = lock_version >= (2,)
248-
249247
def _add_ld(name: str, extras: Set[str]) -> Optional['Component']:
250-
name = name.lower()
248+
name = normalize_packagename(name)
251249
if name == 'python':
252250
return None
253-
if should_tidy_lock_names:
254-
name = name.replace('.', '-')
255251
le = lock_data.get(name)
256252
if le is None:
257253
self._logger.warning('skip unlocked component: %s', name)
@@ -292,9 +288,7 @@ def _add_ld(name: str, extras: Set[str]) -> Optional['Component']:
292288
for group_name in use_groups:
293289
self._logger.debug('processing group %r ...', group_name)
294290
for dep_name, dep_spec in po_cfg['group'][group_name].get('dependencies', {}).items():
295-
dep_name = dep_name.lower()
296-
if should_tidy_lock_names:
297-
dep_name = dep_name.replace('.', '-')
291+
dep_name = normalize_packagename(dep_name)
298292
self._logger.debug('root-component depends on %s', dep_name)
299293
if dep_name == 'python':
300294
continue
@@ -389,7 +383,7 @@ def __purl_qualifiers4lock(self, package: 'NameDict') -> 'NameDict':
389383
# > For version-controlled files, the VCS location syntax is similar to a URL and has the:
390384
# > `<vcs_tool>+<transport>://<host_name>[/<path_to_repository>][@<revision_tag_or_branch>][#<sub_path>]`
391385
qs['vcs_url'] = f'{source["type"]}+{redact_auth_from_url(source["url"])}@' + \
392-
source.get('resolved_reference', source.get('reference', ''))
386+
source.get('resolved_reference', source.get('reference', ''))
393387
elif source_type == 'url':
394388
if '://files.pythonhosted.org/' not in source['url']:
395389
# skip PURL bloat, do not add implicit information

cyclonedx_py/_internal/utils/packaging.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
# SPDX-License-Identifier: Apache-2.0
1616
# Copyright (c) OWASP Foundation. All Rights Reserved.
1717

18+
from re import compile as re_compile
1819
from typing import TYPE_CHECKING, Generator, List
1920

2021
from cyclonedx.exception.model import InvalidUriException
@@ -71,3 +72,12 @@ def metadata2extrefs(metadata: 'PackageMetadata') -> Generator['ExternalReferenc
7172
url=XsUri(url.strip()))
7273
except InvalidUriException: # pragma: nocover
7374
pass
75+
76+
77+
# see https://packaging.python.org/en/latest/specifications/name-normalization/#name-normalization
78+
_NORMALIZE_PN_MATCHER = re_compile(r'[-_.]+')
79+
_NORMALIZE_PN_REPLACE = '-'
80+
81+
82+
def normalize_packagename(name: str) -> str:
83+
return _NORMALIZE_PN_MATCHER.sub(_NORMALIZE_PN_REPLACE, name).lower()
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
"""
2+
initialize this testbed.
3+
"""
4+
5+
from os import name as os_name
6+
from os.path import dirname, join
7+
from subprocess import PIPE, CompletedProcess, run # nosec:B404
8+
from sys import argv, executable
9+
from typing import Any
10+
from venv import EnvBuilder
11+
12+
__all__ = ['main']
13+
14+
this_dir = dirname(__file__)
15+
env_dir = join(this_dir, '.venv')
16+
constraint_file = join(this_dir, 'pinning.txt')
17+
18+
19+
def pip_run(*args: str, **kwargs: Any) -> CompletedProcess:
20+
# pip is not API, but a CLI -- call it like that!
21+
call = (
22+
executable, '-m', 'pip',
23+
'--python', env_dir,
24+
*args
25+
)
26+
print('+ ', *call)
27+
res = run(call, **kwargs, cwd=this_dir, shell=False) # nosec:B603
28+
if res.returncode != 0:
29+
raise RuntimeError('process failed')
30+
return res
31+
32+
33+
def pip_install(*args: str) -> None:
34+
pip_run(
35+
'install', '--require-virtualenv', '--no-input', '--progress-bar=off', '--no-color',
36+
'-c', constraint_file, # needed for reproducibility
37+
*args
38+
)
39+
40+
41+
def main() -> None:
42+
EnvBuilder(
43+
system_site_packages=False,
44+
symlinks=os_name != 'nt',
45+
with_pip=False,
46+
).create(env_dir)
47+
48+
pip_install(
49+
# https://packaging.python.org/en/latest/specifications/name-normalization/#name-normalization
50+
'ruamel-YAML[jinja2]', # actually "ruamel.yaml", normalizes to "ruamel-yaml"
51+
)
52+
53+
54+
if __name__ == '__main__':
55+
main()
56+
if '--pin' in argv:
57+
res = pip_run('freeze', '--all', '--local', stdout=PIPE)
58+
with open(constraint_file, 'wb') as cf:
59+
cf.write(res.stdout)
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
ruamel.yaml==0.18.5
2+
ruamel.yaml.clib==0.2.8
3+
ruamel.yaml.jinja2==0.2.7
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
[project]
2+
# https://packaging.python.org/en/latest/specifications/declaring-project-metadata/#declaring-project-metadata
3+
name = "normalize-packagename"
4+
version = "0.1.0"
5+
description = "packages with non-normalized names"
6+
7+
# see https://packaging.python.org/en/latest/specifications/name-normalization/#name-normalization
8+
9+
dependencies = [
10+
"ruamel-YAML[jinja2]" # actually "ruamel.yaml", normalizes to "ruamel-yaml"
11+
]
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
[pipenv]
2+
sort_pipfile = true
3+
4+
[packages]
5+
"ruamel-YAML[jinja2]" = "*" # actually "ruamel.yaml", normalizes to "ruamel-yaml"
6+
7+
[dev-packages]

tests/_data/infiles/pipenv/normalize-packagename/Pipfile.lock

Lines changed: 93 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
[project]
2+
# https://packaging.python.org/en/latest/specifications/declaring-project-metadata/#declaring-project-metadata
3+
name = "normalize-packagename"
4+
version = "0.1.0"
5+
description = "packages with non-normalized names"
6+
7+
# see https://packaging.python.org/en/latest/specifications/name-normalization/#name-normalization
8+
9+
dependencies = [
10+
"ruamel-YAML[jinja2]" # actually "ruamel.yaml", normalizes to "ruamel-yaml"
11+
]
12+

0 commit comments

Comments
 (0)