|
23 | 23 |
|
24 | 24 | from base64 import b64encode |
25 | 25 | from os.path import join |
26 | | -from typing import TYPE_CHECKING, Generator, Set, Union |
| 26 | +from typing import TYPE_CHECKING, Generator |
27 | 27 |
|
28 | 28 | from cyclonedx.factory.license import LicenseFactory |
29 | 29 | from cyclonedx.model import AttachedText, Encoding |
30 | 30 | from cyclonedx.model.license import DisjunctiveLicense, LicenseAcknowledgement |
31 | 31 |
|
32 | | -from .io import io2str |
| 32 | +from .bytes import bytes2str |
33 | 33 | from .mimetypes import guess_type |
34 | 34 |
|
35 | 35 | if TYPE_CHECKING: # pragma: no cover |
|
38 | 38 |
|
39 | 39 | from cyclonedx.model.license import License |
40 | 40 |
|
41 | | - |
42 | | -def _try_load(dist: 'Distribution', metadir: str, filename: str) -> Union[str, None]: |
43 | | - # Might raise NotImplementedError in theory |
44 | | - # but nothing we can do in that case. |
45 | | - try: |
46 | | - candidate = dist.locate_file(join(metadir, filename)) |
47 | | - except NotImplementedError: |
48 | | - return None |
49 | | - |
50 | | - if not candidate: |
51 | | - return None |
52 | | - |
53 | | - try: |
54 | | - with open(str(candidate), 'rb') as fin: |
55 | | - return io2str(fin) |
56 | | - except FileNotFoundError: |
57 | | - pass |
58 | | - return None |
59 | | - |
60 | | - |
61 | | -def handle_bad_license_file_encoding( |
62 | | - dist: 'Distribution', |
63 | | - lfile: str, |
64 | | - logger: 'Logger' |
65 | | -) -> Union[str, None]: |
66 | | - # Distribution has no method to find the actual metadata dir, |
67 | | - # e.g. dist-info or egg-info. |
68 | | - # So we mimic the logic in PathDistribution and check both subdirs |
69 | | - content: Union[str, None] = None |
70 | | - for metadir in ('.dist-info', '.egg-info'): |
71 | | - content = _try_load(dist, metadir, lfile) |
72 | | - if content: |
73 | | - break |
74 | | - |
75 | | - if content is None: |
76 | | - logger.debug('Error: license file %r for dist %r is not UTF-8 encoded', |
77 | | - lfile, dist.metadata['Name']) |
78 | | - return content |
79 | | - |
80 | | - |
81 | | -def gather_license_texts( |
82 | | - dist: 'Distribution', |
83 | | - lfiles: Set[str], |
84 | | - logger: 'Logger' |
85 | | -) -> Generator['License', None, None]: |
86 | | - lack = LicenseAcknowledgement.DECLARED |
87 | | - for mlfile in lfiles: |
88 | | - # see spec: https://peps.python.org/pep-0639/#add-license-file-field |
89 | | - # latest spec rev: https://discuss.python.org/t/pep-639-round-3-improving-license-clarity-with-better-package-metadata/53020 # noqa: E501 |
90 | | - |
91 | | - # per spec > license files are stored in the `.dist-info/licenses/` subdirectory of the produced wheel. |
92 | | - # but in practice, other locations are used, too. |
93 | | - # loop over the candidate location and pick the first one found. |
94 | | - content = None |
95 | | - for loc in ('licenses', 'license_files', '.'): |
96 | | - path = join(loc, mlfile) |
97 | | - try: |
98 | | - content = dist.read_text(path) |
99 | | - except UnicodeDecodeError: |
100 | | - # Malformed, try harder |
101 | | - content = handle_bad_license_file_encoding(dist, path, logger) |
102 | | - |
103 | | - if content is not None: |
104 | | - break |
105 | | - else: |
106 | | - logger.debug('Error: failed to read license file %r for dist %r', |
107 | | - mlfile, dist.metadata['Name']) |
108 | | - continue |
109 | | - |
110 | | - encoding = None |
111 | | - content_type = guess_type(mlfile) or AttachedText.DEFAULT_CONTENT_TYPE |
112 | | - # per default, license files are human-readable texts. |
113 | | - if not content_type.startswith('text/'): |
114 | | - encoding = Encoding.BASE_64 |
115 | | - content = b64encode(content.encode('utf-8')).decode('ascii') |
116 | | - yield DisjunctiveLicense( |
117 | | - name=f'declared license file: {mlfile}', |
118 | | - acknowledgement=lack, |
119 | | - text=AttachedText( |
120 | | - content=content, |
121 | | - encoding=encoding, |
122 | | - content_type=content_type |
123 | | - )) |
| 41 | +# per spec > license files are stored in the `.dist-info/licenses/` subdirectory of the produced wheel. |
| 42 | +# but in practice, other locations are used, too. |
| 43 | +_LICENSE_LOCATIONS = ('licenses', 'license_files', '') |
124 | 44 |
|
125 | 45 |
|
126 | 46 | def dist2licenses( |
127 | 47 | dist: 'Distribution', |
128 | 48 | gather_text: bool, |
129 | 49 | logger: 'Logger' |
130 | 50 | ) -> Generator['License', None, None]: |
| 51 | + lfac = LicenseFactory() |
| 52 | + lack = LicenseAcknowledgement.DECLARED |
131 | 53 | metadata = dist.metadata # see https://packaging.python.org/en/latest/specifications/core-metadata/ |
132 | 54 | if (lexp := metadata['License-Expression']) is not None: |
133 | | - lfac = LicenseFactory() |
134 | | - lack = LicenseAcknowledgement.DECLARED |
135 | 55 | # see spec: https://peps.python.org/pep-0639/#add-license-expression-field |
136 | 56 | yield lfac.make_from_string(lexp, |
137 | 57 | license_acknowledgement=lack) |
138 | | - if gather_text and (lfiles := set(fn for fn in metadata.get_all('License-File', ()))): |
139 | | - yield from gather_license_texts(dist, lfiles, logger) |
| 58 | + if gather_text: |
| 59 | + for mlfile in set(metadata.get_all('License-File', ())): |
| 60 | + # see spec: https://peps.python.org/pep-0639/#add-license-file-field |
| 61 | + # latest spec rev: https://discuss.python.org/t/pep-639-round-3-improving-license-clarity-with-better-package-metadata/53020 # noqa: E501 |
| 62 | + content = None |
| 63 | + for mlpath in _LICENSE_LOCATIONS: |
| 64 | + try: |
| 65 | + content = dist.read_text(join(mlpath, mlfile)) |
| 66 | + except UnicodeDecodeError as err: |
| 67 | + try: |
| 68 | + content = bytes2str(err.object) |
| 69 | + except UnicodeDecodeError: |
| 70 | + pass |
| 71 | + else: |
| 72 | + break # for-loop |
| 73 | + else: |
| 74 | + if content is not None: |
| 75 | + break # for-loop |
| 76 | + if content is None: # pragma: no cover |
| 77 | + logger.debug('Error: failed to read license file %r for dist %r', |
| 78 | + mlfile, metadata['Name']) |
| 79 | + continue |
| 80 | + encoding = None |
| 81 | + content_type = guess_type(mlfile) or AttachedText.DEFAULT_CONTENT_TYPE |
| 82 | + # per default, license files are human-readable texts. |
| 83 | + if not content_type.startswith('text/'): |
| 84 | + encoding = Encoding.BASE_64 |
| 85 | + content = b64encode(content.encode('utf-8')).decode('ascii') |
| 86 | + yield DisjunctiveLicense( |
| 87 | + name=f'declared license file: {mlfile}', |
| 88 | + acknowledgement=lack, |
| 89 | + text=AttachedText( |
| 90 | + content=content, |
| 91 | + encoding=encoding, |
| 92 | + content_type=content_type |
| 93 | + )) |
0 commit comments