|
11 | 11 |
|
12 | 12 | import attr |
13 | 13 |
|
14 | | -from commoncode import fileutils |
15 | | -from commoncode.cliutils import PluggableCommandLineOption |
16 | 14 | from plugincode.scan import ScanPlugin |
17 | 15 | from plugincode.scan import scan_impl |
18 | 16 | from commoncode.cliutils import MISC_GROUP |
| 17 | +from commoncode.cliutils import PluggableCommandLineOption |
19 | 18 | from commoncode.cliutils import SCAN_OPTIONS_GROUP |
20 | 19 | from commoncode.cliutils import SCAN_GROUP |
| 20 | +from commoncode.fileutils import file_name |
21 | 21 | from scancode.api import SCANCODE_LICENSEDB_URL |
22 | 22 |
|
23 | 23 |
|
| 24 | +TRACE = True |
| 25 | + |
| 26 | +def logger_debug(*args): pass |
| 27 | + |
| 28 | + |
| 29 | +if TRACE: |
| 30 | + use_print = True |
| 31 | + if use_print: |
| 32 | + prn = print |
| 33 | + else: |
| 34 | + import logging |
| 35 | + import sys |
| 36 | + logger = logging.getLogger(__name__) |
| 37 | + # logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) |
| 38 | + logging.basicConfig(stream=sys.stdout) |
| 39 | + logger.setLevel(logging.DEBUG) |
| 40 | + prn = logger.debug |
| 41 | + |
| 42 | + def logger_debug(*args): |
| 43 | + return prn(' '.join(isinstance(a, str) and a or repr(a) for a in args)) |
| 44 | + |
| 45 | + |
24 | 46 | def reindex_licenses(ctx, param, value): |
25 | 47 | if not value or ctx.resilient_parsing: |
26 | 48 | return |
@@ -119,71 +141,85 @@ def get_scanner( |
119 | 141 | ) |
120 | 142 |
|
121 | 143 | def process_codebase(self, codebase, **kwargs): |
122 | | - |
| 144 | + |
123 | 145 | if codebase.has_single_resource: |
124 | 146 | return |
125 | 147 |
|
126 | | - for resource in codebase.walk(topdown=False): |
127 | | - match_reference_license(resource,codebase) |
128 | | - |
129 | | - |
130 | | -def match_reference_license(resource, codebase): |
| 148 | + for resource in codebase.walk(): |
| 149 | + if TRACE: |
| 150 | + license_expressions_before = list(resource.license_expressions) |
| 151 | + modified = add_referenced_filenames_license_matches(resource, codebase) |
| 152 | + if TRACE and modified: |
| 153 | + license_expressions_after = list(resource.license_expressions) |
| 154 | + logger_debug( |
| 155 | + f'add_referenced_filenames_license_matches: Modfied:', |
| 156 | + f'{resource} with license_expressions:\n' |
| 157 | + f'before: {license_expressions_before}\n' |
| 158 | + f'after : {license_expressions_after}' |
| 159 | + ) |
| 160 | + |
| 161 | + |
| 162 | +def add_referenced_filenames_license_matches(resource, codebase): |
131 | 163 | """ |
132 | | - Return the ``resource`` Resource updating and saving it in place, after adding new |
133 | | - license matches (licenses and license_expressions) following their Rule |
134 | | - ``referenced_filenames`` if any. Return None if this is not a file Resource. |
| 164 | + Return an updated ``resource`` saving it in place, after adding new license |
| 165 | + matches (licenses and license_expressions) following their Rule |
| 166 | + ``referenced_filenames`` if any. Return None if ``resource`` is not a file |
| 167 | + Resource or was not updated. |
135 | 168 | """ |
136 | 169 | if not resource.is_file: |
137 | 170 | return |
138 | 171 |
|
139 | | - licenses = resource.licenses |
| 172 | + license_matches = resource.licenses |
| 173 | + if not license_matches: |
| 174 | + return |
| 175 | + |
140 | 176 | license_expressions = resource.license_expressions |
141 | | - if not licenses: |
142 | | - return |
143 | 177 |
|
144 | | - referenced_licenses = [] |
145 | | - referenced_license_expressions = [] |
146 | | - referenced_filenames = get_referenced_filenames(licenses) |
147 | 178 | modified = False |
148 | | - |
149 | | - for referenced_filename in referenced_filenames: |
150 | | - new_resource = find_referenced_resource(referenced_filename=referenced_filename, resource=resource, codebase=codebase) |
151 | | - if new_resource: |
| 179 | + |
| 180 | + for referenced_filename in get_referenced_filenames(license_matches): |
| 181 | + referenced_resource = find_referenced_resource( |
| 182 | + referenced_filename=referenced_filename, |
| 183 | + resource=resource, |
| 184 | + codebase=codebase, |
| 185 | + ) |
| 186 | + |
| 187 | + if referenced_resource and referenced_resource.licenses: |
152 | 188 | modified = True |
153 | | - referenced_licenses.extend(new_resource.licenses) |
154 | | - referenced_license_expressions.extend(new_resource.license_expressions) |
| 189 | + # TODO: we should hint that these matches were defererenced from |
| 190 | + # following a referenced filename |
| 191 | + license_matches.extend(referenced_resource.licenses) |
| 192 | + license_expressions.extend(referenced_resource.license_expressions) |
155 | 193 |
|
156 | | - licenses.extend(referenced_licenses) |
157 | | - license_expressions.extend(referenced_license_expressions) |
158 | | - |
159 | 194 | if modified: |
160 | 195 | codebase.save_resource(resource) |
161 | | - return resource |
| 196 | + return resource |
162 | 197 |
|
163 | 198 |
|
164 | 199 | def get_referenced_filenames(license_matches): |
165 | 200 | """ |
166 | | - Return a list of unique referenced filenames found in the rules of a list of ``license_matches`` |
| 201 | + Return a list of unique referenced filenames found in the rules of a list of |
| 202 | + ``license_matches`` |
167 | 203 | """ |
168 | | - referenced_filenames = [] |
| 204 | + unique_filenames = [] |
169 | 205 | for license_match in license_matches: |
170 | | - referenced_files = license_match['matched_rule']['referenced_filenames'] |
171 | | - for referenced_filename in referenced_files: |
172 | | - if not referenced_filename in referenced_filenames: |
173 | | - referenced_filenames.append(referenced_filename) |
174 | | - |
175 | | - return referenced_filenames |
| 206 | + for filename in license_match['matched_rule']['referenced_filenames']: |
| 207 | + if filename not in unique_filenames: |
| 208 | + unique_filenames.append(filename) |
| 209 | + |
| 210 | + return unique_filenames |
176 | 211 |
|
177 | 212 |
|
178 | 213 | def find_referenced_resource(referenced_filename, resource, codebase, **kwargs): |
179 | 214 | """ |
180 | | - Return a Resource matching the ``referenced_filename`` path or filename given a ``resource`` in ``codebase``. |
181 | | - Return None if the ``referenced_filename`` cannot be found in the same directory as the base ``resource``. |
182 | | - ``referenced_filename`` is the path or filename referenced in a LicenseMatch of ``resource``, |
| 215 | + Return a Resource matching the ``referenced_filename`` path or filename |
| 216 | + given a ``resource`` in ``codebase``. Return None if the |
| 217 | + ``referenced_filename`` cannot be found in the same directory as the base |
| 218 | + ``resource``. ``referenced_filename`` is the path or filename referenced in |
| 219 | + a LicenseMatch of ``resource``, |
183 | 220 | """ |
184 | | - parent = resource.parent(codebase) |
185 | | - |
186 | | - for child in parent.children(codebase): |
187 | | - path = child.path |
188 | | - if path.endswith(referenced_filename) or fileutils.file_base_name(child.path) == referenced_filename: |
| 221 | + # this can be a path |
| 222 | + ref_filename = file_name(referenced_filename) |
| 223 | + for child in resource.parent(codebase).children(codebase): |
| 224 | + if child.name == ref_filename: |
189 | 225 | return child |
0 commit comments