|
23 | 23 |
|
24 | 24 | from base64 import b64encode |
25 | 25 | from os.path import join |
26 | | -from typing import TYPE_CHECKING, Generator |
| 26 | +from typing import TYPE_CHECKING, Generator, Set, Union |
27 | 27 |
|
28 | 28 | from cyclonedx.factory.license import LicenseFactory |
29 | 29 | from cyclonedx.model import AttachedText, Encoding |
30 | 30 | from cyclonedx.model.license import DisjunctiveLicense, LicenseAcknowledgement |
31 | 31 |
|
| 32 | +from .io import io2str |
32 | 33 | from .mimetypes import guess_type |
33 | 34 |
|
34 | 35 | if TYPE_CHECKING: # pragma: no cover |
|
38 | 39 | from cyclonedx.model.license import License |
39 | 40 |
|
40 | 41 |
|
| 42 | +def handle_bad_license_file_encoding( |
| 43 | + dist: 'Distribution', |
| 44 | + lfile: str, |
| 45 | + logger: 'Logger' |
| 46 | +) -> Union[str, None]: |
| 47 | + |
| 48 | + def try_load(dist: 'Distribution', metadir: str, filename: str) -> Union[str, None]: |
| 49 | + # Might raise NotImplementedError in theory |
| 50 | + # but nothing we can do in that case. |
| 51 | + try: |
| 52 | + candidate = dist.locate_file(join(metadir, filename)) |
| 53 | + except NotImplementedError: |
| 54 | + return None |
| 55 | + |
| 56 | + if not candidate: |
| 57 | + return None |
| 58 | + |
| 59 | + try: |
| 60 | + with open(str(candidate), 'rb') as fin: |
| 61 | + return io2str(fin) |
| 62 | + except FileNotFoundError: |
| 63 | + pass |
| 64 | + return None |
| 65 | + |
| 66 | + # Distribution has no method to find the actual metadata dir, |
| 67 | + # e.g. dist-info or egg-info. |
| 68 | + # So we mimic the logic in PathDistribution and check both subdirs |
| 69 | + content: Union[str, None] = None |
| 70 | + for metadir in ('.dist-info', '.egg-info'): |
| 71 | + content = try_load(dist, metadir, lfile) |
| 72 | + if content: |
| 73 | + break |
| 74 | + |
| 75 | + if content is None: |
| 76 | + logger.debug('Error: license file %r for dist %r is not UTF-8 encoded', |
| 77 | + lfile, dist.metadata['Name']) |
| 78 | + return content |
| 79 | + |
| 80 | + |
| 81 | +def gather_license_texts( |
| 82 | + dist: 'Distribution', |
| 83 | + lfiles: Set[str], |
| 84 | + logger: 'Logger' |
| 85 | +) -> Generator['License', None, None]: |
| 86 | + lack = LicenseAcknowledgement.DECLARED |
| 87 | + for mlfile in lfiles: |
| 88 | + # see spec: https://peps.python.org/pep-0639/#add-license-file-field |
| 89 | + # latest spec rev: https://discuss.python.org/t/pep-639-round-3-improving-license-clarity-with-better-package-metadata/53020 # noqa: E501 |
| 90 | + |
| 91 | + # per spec > license files are stored in the `.dist-info/licenses/` subdirectory of the produced wheel. |
| 92 | + # but in practice, other locations are used, too. |
| 93 | + # loop over the candidate location and pick the first one found. |
| 94 | + locations = ('licenses', 'license_files', '.') |
| 95 | + malformed = None |
| 96 | + content = None |
| 97 | + for loc in locations: |
| 98 | + try: |
| 99 | + path = join(loc, mlfile) |
| 100 | + content = dist.read_text(path) |
| 101 | + except UnicodeDecodeError: |
| 102 | + # Malformed, stop looking |
| 103 | + malformed = path |
| 104 | + break |
| 105 | + |
| 106 | + if content is not None: |
| 107 | + break |
| 108 | + |
| 109 | + if content is None and malformed: # pragma: no cover |
| 110 | + # Try a little harder |
| 111 | + content = handle_bad_license_file_encoding(dist, malformed, logger) |
| 112 | + |
| 113 | + if content is None: # pragme: no cover |
| 114 | + logger.debug('Error: failed to read license file %r for dist %r', |
| 115 | + mlfile, dist.metadata['Name']) |
| 116 | + continue |
| 117 | + |
| 118 | + encoding = None |
| 119 | + content_type = guess_type(mlfile) or AttachedText.DEFAULT_CONTENT_TYPE |
| 120 | + # per default, license files are human-readable texts. |
| 121 | + if not content_type.startswith('text/'): |
| 122 | + encoding = Encoding.BASE_64 |
| 123 | + content = b64encode(content.encode('utf-8')).decode('ascii') |
| 124 | + yield DisjunctiveLicense( |
| 125 | + name=f'declared license file: {mlfile}', |
| 126 | + acknowledgement=lack, |
| 127 | + text=AttachedText( |
| 128 | + content=content, |
| 129 | + encoding=encoding, |
| 130 | + content_type=content_type |
| 131 | + )) |
| 132 | + |
| 133 | + |
41 | 134 | def dist2licenses( |
42 | 135 | dist: 'Distribution', |
43 | 136 | gather_text: bool, |
44 | 137 | logger: 'Logger' |
45 | 138 | ) -> Generator['License', None, None]: |
46 | | - lfac = LicenseFactory() |
47 | | - lack = LicenseAcknowledgement.DECLARED |
48 | 139 | metadata = dist.metadata # see https://packaging.python.org/en/latest/specifications/core-metadata/ |
49 | 140 | if (lexp := metadata['License-Expression']) is not None: |
| 141 | + lfac = LicenseFactory() |
| 142 | + lack = LicenseAcknowledgement.DECLARED |
50 | 143 | # see spec: https://peps.python.org/pep-0639/#add-license-expression-field |
51 | 144 | yield lfac.make_from_string(lexp, |
52 | 145 | license_acknowledgement=lack) |
53 | | - if gather_text: |
54 | | - for mlfile in set(metadata.get_all('License-File', ())): |
55 | | - # see spec: https://peps.python.org/pep-0639/#add-license-file-field |
56 | | - # latest spec rev: https://discuss.python.org/t/pep-639-round-3-improving-license-clarity-with-better-package-metadata/53020 # noqa: E501 |
57 | | - |
58 | | - # per spec > license files are stored in the `.dist-info/licenses/` subdirectory of the produced wheel. |
59 | | - # but in practice, other locations are used, too. |
60 | | - content = dist.read_text(join('licenses', mlfile)) \ |
61 | | - or dist.read_text(join('license_files', mlfile)) \ |
62 | | - or dist.read_text(mlfile) |
63 | | - if content is None: # pragma: no cover |
64 | | - logger.debug('Error: failed to read license file %r for dist %r', |
65 | | - mlfile, metadata['Name']) |
66 | | - continue |
67 | | - encoding = None |
68 | | - content_type = guess_type(mlfile) or AttachedText.DEFAULT_CONTENT_TYPE |
69 | | - # per default, license files are human-readable texts. |
70 | | - if not content_type.startswith('text/'): |
71 | | - encoding = Encoding.BASE_64 |
72 | | - content = b64encode(content.encode('utf-8')).decode('ascii') |
73 | | - yield DisjunctiveLicense( |
74 | | - name=f'declared license file: {mlfile}', |
75 | | - acknowledgement=lack, |
76 | | - text=AttachedText( |
77 | | - content=content, |
78 | | - encoding=encoding, |
79 | | - content_type=content_type |
80 | | - )) |
| 146 | + if gather_text and (lfiles := set(str(fn) for fn in metadata.get_all('License-File', ()))): |
| 147 | + for lic in gather_license_texts(dist, lfiles, logger): |
| 148 | + yield lic |
0 commit comments