Skip to content

Commit 0d6215c

Browse files
committed
fix for #931
Signed-off-by: Jan Kowalleck <[email protected]>
1 parent 65eeba1 commit 0d6215c

15 files changed

+526
-6729
lines changed

cyclonedx_py/_internal/environment.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ def __add_components(self, bom: 'Bom',
190190
component.licenses.update(metadata2licenses(dist_meta, lfac,
191191
gather_texts=self._gather_license_texts))
192192
if self._gather_license_texts:
193-
component.licenses.update(pep639_dist2licenses_from_files(dist, lfac, logger=self._logger))
193+
component.licenses.update(pep639_dist2licenses_from_files(dist, logger=self._logger))
194194
licenses_fixup(component)
195195
del lfac
196196
# endregion licenses

cyclonedx_py/_internal/utils/pep621.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,10 +71,10 @@ def project2licenses(project: dict[str, Any], lfac: 'LicenseFactory',
7171
# > These keys are mutually exclusive, so a tool MUST raise an error if the metadata specifies both keys.
7272
raise ValueError('`license.file` and `license.text` are mutually exclusive,')
7373
if gather_text and 'file' in plicense:
74-
# per spec:
74+
# Per PEP 621 spec:
7575
# > [...] a string value that is a relative file path [...].
7676
# > Tools MUST assume the file’s encoding is UTF-8.
77-
# anyway, we don't trust this and assume binary
77+
# But in reality, we found non-printable bytes in some files!
7878
with open(join(dirname(fpath), *PurePosixPath(plicense['file']).parts), 'rb') as plicense_fileh:
7979
yield DisjunctiveLicense(name=f"declared license of '{project['name']}'",
8080
acknowledgement=lack,

cyclonedx_py/_internal/utils/pep639.py

Lines changed: 28 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,12 @@
2525
from collections.abc import Generator
2626
from os.path import dirname, join
2727
from pathlib import Path, PurePosixPath
28-
from typing import TYPE_CHECKING, Any, AnyStr
28+
from typing import TYPE_CHECKING, Any, Union
2929

3030
from cyclonedx.model import AttachedText, Encoding
3131
from cyclonedx.model.license import DisjunctiveLicense, LicenseAcknowledgement
3232

3333
from ..py_interop.glob import glob
34-
from .bytes import bytes2str
3534
from .mimetypes import guess_type
3635

3736
if TYPE_CHECKING: # pragma: no cover
@@ -74,61 +73,51 @@ def project2licenses(project: dict[str, Any], lfac: 'LicenseFactory',
7473
return None
7574

7675

77-
# per spec > license files are stored in the `.dist-info/licenses/` subdirectory of the produced wheel.
78-
# but in practice, other locations are used, too.
79-
_LICENSE_LOCATIONS = ('licenses', 'license_files', '')
76+
# Per PEP 639 spec, license files are stored in the `.dist-info/` directory of the produced wheel.
77+
# see https://peps.python.org/pep-0639/#add-license-file-field
78+
# Put in reality, other locations are used, too...
79+
_LICENSE_LOCATIONS = ('', 'licenses', 'license_files')
8080

8181

8282
def dist2licenses_from_files(
83-
dist: 'Distribution', lfac: 'LicenseFactory',
83+
dist: 'Distribution',
8484
logger: 'Logger'
8585
) -> Generator['License', None, None]:
8686
lack = LicenseAcknowledgement.DECLARED
8787
metadata = dist.metadata # see https://packaging.python.org/en/latest/specifications/core-metadata/
8888
for mlfile in set(metadata.get_all('License-File', ())):
8989
# see spec: https://peps.python.org/pep-0639/#add-license-file-field
9090
# latest spec rev: https://discuss.python.org/t/pep-639-round-3-improving-license-clarity-with-better-package-metadata/53020 # noqa: E501
91-
content = None
91+
content: Union[None, str, bytes] = None
9292
for mlpath in _LICENSE_LOCATIONS:
9393
try:
9494
content = dist.read_text(join(mlpath, mlfile))
95+
break # for-loop
9596
except UnicodeDecodeError as err:
96-
try:
97-
content = bytes2str(err.object)
98-
except UnicodeDecodeError:
99-
pass
100-
else:
101-
break # for-loop
102-
else:
103-
if content is not None:
104-
break # for-loop
97+
content = err.object
98+
break # for-loop
99+
except Exception:
100+
continue # for-loop
105101
if content is None: # pragma: no cover
106102
logger.debug('Error: failed to read license file %r for dist %r',
107103
mlfile, metadata['Name'])
108-
continue
104+
continue # for-loop
109105
yield _make_license_from_content(mlfile, content, lack)
110106

111107

112-
def _make_license_from_content(file_name: str, content: AnyStr, lack: 'LicenseAcknowledgement') -> DisjunctiveLicense:
113-
encoding = None
108+
def _make_license_from_content(file_name: str, content: Union[str, bytes],
109+
lack: 'LicenseAcknowledgement') -> DisjunctiveLicense:
114110
content_type = guess_type(file_name) or AttachedText.DEFAULT_CONTENT_TYPE
115-
# per default, license files are human-readable texts.
116-
if content_type.startswith('text/'):
117-
content_s = bytes2str(content) \
118-
if isinstance(content, bytes) \
119-
else content
120-
else:
121-
encoding = Encoding.BASE_64
122-
content_s = b64encode(
123-
content
124-
if isinstance(content, bytes)
125-
else content.encode('utf-8')
126-
).decode('ascii')
127-
return DisjunctiveLicense(
128-
name=f'{lack.value} license file: {"/".join(Path(file_name).parts)}',
129-
acknowledgement=lack,
130-
text=AttachedText(
131-
content=content_s,
132-
encoding=encoding,
133-
content_type=content_type
134-
))
111+
# Per PEP 639 spec, license files are human-readable texts.
112+
# But in reality, we found non-printable bytes in some files!
113+
encoding = Encoding.BASE_64
114+
content_s = b64encode(
115+
content
116+
if isinstance(content, bytes)
117+
else content.encode('utf-8')
118+
).decode('ascii')
119+
return DisjunctiveLicense(name=f'{lack.value} license file: {"/".join(Path(file_name).parts)}',
120+
acknowledgement=lack,
121+
text=AttachedText(content=content_s,
122+
encoding=encoding,
123+
content_type=content_type))

tests/_data/infiles/environment/with-license-pep639/init.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def main() -> None:
7171
'jsonpointer',
7272
'license_expression',
7373
'lxml',
74-
'chardet==5.2.0' , # https://github.com/CycloneDX/cyclonedx-python/issues/931
74+
'chardet==5.2.0', # https://github.com/CycloneDX/cyclonedx-python/issues/931
7575
# with expression-like License AND License-File
7676
'cryptography==43.0.1', # https://github.com/CycloneDX/cyclonedx-python/issues/826
7777
# with possibly unexpected license files

0 commit comments

Comments
 (0)