|
25 | 25 | from collections.abc import Generator |
26 | 26 | from os.path import dirname, join |
27 | 27 | from pathlib import Path, PurePosixPath |
28 | | -from typing import TYPE_CHECKING, Any, AnyStr |
| 28 | +from typing import TYPE_CHECKING, Any, Union |
29 | 29 |
|
30 | 30 | from cyclonedx.model import AttachedText, Encoding |
31 | 31 | from cyclonedx.model.license import DisjunctiveLicense, LicenseAcknowledgement |
32 | 32 |
|
33 | 33 | from ..py_interop.glob import glob |
34 | | -from .bytes import bytes2str |
35 | 34 | from .mimetypes import guess_type |
36 | 35 |
|
37 | 36 | if TYPE_CHECKING: # pragma: no cover |
@@ -74,61 +73,51 @@ def project2licenses(project: dict[str, Any], lfac: 'LicenseFactory', |
74 | 73 | return None |
75 | 74 |
|
76 | 75 |
|
77 | | -# per spec > license files are stored in the `.dist-info/licenses/` subdirectory of the produced wheel. |
78 | | -# but in practice, other locations are used, too. |
79 | | -_LICENSE_LOCATIONS = ('licenses', 'license_files', '') |
| 76 | +# Per PEP 639 spec, license files are stored in the `.dist-info/` directory of the produced wheel. |
| 77 | +# see https://peps.python.org/pep-0639/#add-license-file-field |
| 78 | +# Put in reality, other locations are used, too... |
| 79 | +_LICENSE_LOCATIONS = ('', 'licenses', 'license_files') |
80 | 80 |
|
81 | 81 |
|
82 | 82 | def dist2licenses_from_files( |
83 | | - dist: 'Distribution', lfac: 'LicenseFactory', |
| 83 | + dist: 'Distribution', |
84 | 84 | logger: 'Logger' |
85 | 85 | ) -> Generator['License', None, None]: |
86 | 86 | lack = LicenseAcknowledgement.DECLARED |
87 | 87 | metadata = dist.metadata # see https://packaging.python.org/en/latest/specifications/core-metadata/ |
88 | 88 | for mlfile in set(metadata.get_all('License-File', ())): |
89 | 89 | # see spec: https://peps.python.org/pep-0639/#add-license-file-field |
90 | 90 | # latest spec rev: https://discuss.python.org/t/pep-639-round-3-improving-license-clarity-with-better-package-metadata/53020 # noqa: E501 |
91 | | - content = None |
| 91 | + content: Union[None, str, bytes] = None |
92 | 92 | for mlpath in _LICENSE_LOCATIONS: |
93 | 93 | try: |
94 | 94 | content = dist.read_text(join(mlpath, mlfile)) |
| 95 | + break # for-loop |
95 | 96 | except UnicodeDecodeError as err: |
96 | | - try: |
97 | | - content = bytes2str(err.object) |
98 | | - except UnicodeDecodeError: |
99 | | - pass |
100 | | - else: |
101 | | - break # for-loop |
102 | | - else: |
103 | | - if content is not None: |
104 | | - break # for-loop |
| 97 | + content = err.object |
| 98 | + break # for-loop |
| 99 | + except Exception: |
| 100 | + continue # for-loop |
105 | 101 | if content is None: # pragma: no cover |
106 | 102 | logger.debug('Error: failed to read license file %r for dist %r', |
107 | 103 | mlfile, metadata['Name']) |
108 | | - continue |
| 104 | + continue # for-loop |
109 | 105 | yield _make_license_from_content(mlfile, content, lack) |
110 | 106 |
|
111 | 107 |
|
112 | | -def _make_license_from_content(file_name: str, content: AnyStr, lack: 'LicenseAcknowledgement') -> DisjunctiveLicense: |
113 | | - encoding = None |
| 108 | +def _make_license_from_content(file_name: str, content: Union[str, bytes], |
| 109 | + lack: 'LicenseAcknowledgement') -> DisjunctiveLicense: |
114 | 110 | content_type = guess_type(file_name) or AttachedText.DEFAULT_CONTENT_TYPE |
115 | | - # per default, license files are human-readable texts. |
116 | | - if content_type.startswith('text/'): |
117 | | - content_s = bytes2str(content) \ |
118 | | - if isinstance(content, bytes) \ |
119 | | - else content |
120 | | - else: |
121 | | - encoding = Encoding.BASE_64 |
122 | | - content_s = b64encode( |
123 | | - content |
124 | | - if isinstance(content, bytes) |
125 | | - else content.encode('utf-8') |
126 | | - ).decode('ascii') |
127 | | - return DisjunctiveLicense( |
128 | | - name=f'{lack.value} license file: {"/".join(Path(file_name).parts)}', |
129 | | - acknowledgement=lack, |
130 | | - text=AttachedText( |
131 | | - content=content_s, |
132 | | - encoding=encoding, |
133 | | - content_type=content_type |
134 | | - )) |
| 111 | + # Per PEP 639 spec, license files are human-readable texts. |
| 112 | + # But in reality, we found non-printable bytes in some files! |
| 113 | + encoding = Encoding.BASE_64 |
| 114 | + content_s = b64encode( |
| 115 | + content |
| 116 | + if isinstance(content, bytes) |
| 117 | + else content.encode('utf-8') |
| 118 | + ).decode('ascii') |
| 119 | + return DisjunctiveLicense(name=f'{lack.value} license file: {"/".join(Path(file_name).parts)}', |
| 120 | + acknowledgement=lack, |
| 121 | + text=AttachedText(content=content_s, |
| 122 | + encoding=encoding, |
| 123 | + content_type=content_type)) |
0 commit comments