Skip to content

Commit cdd4c0e

Browse files
committed
Do not match referenced_filenames path suffixes #1364
Only follow license references match an exact filename In #2616 we introduced matching path of referenced_filenames based on matching filename or path suffix. This removes path suffix matching which is problematic. Before this we were using .endswith(path) and this led to weird and incorrect license dereferences Signed-off-by: Philippe Ombredanne <[email protected]>
1 parent e912830 commit cdd4c0e

File tree

5 files changed

+1084
-53
lines changed

5 files changed

+1084
-53
lines changed

src/licensedcode/plugin_license.py

Lines changed: 79 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,38 @@
1111

1212
import attr
1313

14-
from commoncode import fileutils
15-
from commoncode.cliutils import PluggableCommandLineOption
1614
from plugincode.scan import ScanPlugin
1715
from plugincode.scan import scan_impl
1816
from commoncode.cliutils import MISC_GROUP
17+
from commoncode.cliutils import PluggableCommandLineOption
1918
from commoncode.cliutils import SCAN_OPTIONS_GROUP
2019
from commoncode.cliutils import SCAN_GROUP
20+
from commoncode.fileutils import file_name
2121
from scancode.api import SCANCODE_LICENSEDB_URL
2222

2323

24+
TRACE = True
25+
26+
def logger_debug(*args): pass
27+
28+
29+
if TRACE:
30+
use_print = True
31+
if use_print:
32+
prn = print
33+
else:
34+
import logging
35+
import sys
36+
logger = logging.getLogger(__name__)
37+
# logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
38+
logging.basicConfig(stream=sys.stdout)
39+
logger.setLevel(logging.DEBUG)
40+
prn = logger.debug
41+
42+
def logger_debug(*args):
43+
return prn(' '.join(isinstance(a, str) and a or repr(a) for a in args))
44+
45+
2446
def reindex_licenses(ctx, param, value):
2547
if not value or ctx.resilient_parsing:
2648
return
@@ -119,71 +141,85 @@ def get_scanner(
119141
)
120142

121143
def process_codebase(self, codebase, **kwargs):
122-
144+
123145
if codebase.has_single_resource:
124146
return
125147

126-
for resource in codebase.walk(topdown=False):
127-
match_reference_license(resource,codebase)
128-
129-
130-
def match_reference_license(resource, codebase):
148+
for resource in codebase.walk():
149+
if TRACE:
150+
license_expressions_before = list(resource.license_expressions)
151+
modified = add_referenced_filenames_license_matches(resource, codebase)
152+
if TRACE and modified:
153+
license_expressions_after = list(resource.license_expressions)
154+
logger_debug(
155+
f'add_referenced_filenames_license_matches: Modfied:',
156+
f'{resource} with license_expressions:\n'
157+
f'before: {license_expressions_before}\n'
158+
f'after : {license_expressions_after}'
159+
)
160+
161+
162+
def add_referenced_filenames_license_matches(resource, codebase):
131163
"""
132-
Return the ``resource`` Resource updating and saving it in place, after adding new
133-
license matches (licenses and license_expressions) following their Rule
134-
``referenced_filenames`` if any. Return None if this is not a file Resource.
164+
Return an updated ``resource`` saving it in place, after adding new license
165+
matches (licenses and license_expressions) following their Rule
166+
``referenced_filenames`` if any. Return None if ``resource`` is not a file
167+
Resource or was not updated.
135168
"""
136169
if not resource.is_file:
137170
return
138171

139-
licenses = resource.licenses
172+
license_matches = resource.licenses
173+
if not license_matches:
174+
return
175+
140176
license_expressions = resource.license_expressions
141-
if not licenses:
142-
return
143177

144-
referenced_licenses = []
145-
referenced_license_expressions = []
146-
referenced_filenames = get_referenced_filenames(licenses)
147178
modified = False
148-
149-
for referenced_filename in referenced_filenames:
150-
new_resource = find_referenced_resource(referenced_filename=referenced_filename, resource=resource, codebase=codebase)
151-
if new_resource:
179+
180+
for referenced_filename in get_referenced_filenames(license_matches):
181+
referenced_resource = find_referenced_resource(
182+
referenced_filename=referenced_filename,
183+
resource=resource,
184+
codebase=codebase,
185+
)
186+
187+
if referenced_resource and referenced_resource.licenses:
152188
modified = True
153-
referenced_licenses.extend(new_resource.licenses)
154-
referenced_license_expressions.extend(new_resource.license_expressions)
189+
# TODO: we should hint that these matches were defererenced from
190+
# following a referenced filename
191+
license_matches.extend(referenced_resource.licenses)
192+
license_expressions.extend(referenced_resource.license_expressions)
155193

156-
licenses.extend(referenced_licenses)
157-
license_expressions.extend(referenced_license_expressions)
158-
159194
if modified:
160195
codebase.save_resource(resource)
161-
return resource
196+
return resource
162197

163198

164199
def get_referenced_filenames(license_matches):
165200
"""
166-
Return a list of unique referenced filenames found in the rules of a list of ``license_matches``
201+
Return a list of unique referenced filenames found in the rules of a list of
202+
``license_matches``
167203
"""
168-
referenced_filenames = []
204+
unique_filenames = []
169205
for license_match in license_matches:
170-
referenced_files = license_match['matched_rule']['referenced_filenames']
171-
for referenced_filename in referenced_files:
172-
if not referenced_filename in referenced_filenames:
173-
referenced_filenames.append(referenced_filename)
174-
175-
return referenced_filenames
206+
for filename in license_match['matched_rule']['referenced_filenames']:
207+
if filename not in unique_filenames:
208+
unique_filenames.append(filename)
209+
210+
return unique_filenames
176211

177212

178213
def find_referenced_resource(referenced_filename, resource, codebase, **kwargs):
179214
"""
180-
Return a Resource matching the ``referenced_filename`` path or filename given a ``resource`` in ``codebase``.
181-
Return None if the ``referenced_filename`` cannot be found in the same directory as the base ``resource``.
182-
``referenced_filename`` is the path or filename referenced in a LicenseMatch of ``resource``,
215+
Return a Resource matching the ``referenced_filename`` path or filename
216+
given a ``resource`` in ``codebase``. Return None if the
217+
``referenced_filename`` cannot be found in the same directory as the base
218+
``resource``. ``referenced_filename`` is the path or filename referenced in
219+
a LicenseMatch of ``resource``,
183220
"""
184-
parent = resource.parent(codebase)
185-
186-
for child in parent.children(codebase):
187-
path = child.path
188-
if path.endswith(referenced_filename) or fileutils.file_base_name(child.path) == referenced_filename:
221+
# this can be a path
222+
ref_filename = file_name(referenced_filename)
223+
for child in resource.parent(codebase).children(codebase):
224+
if child.name == ref_filename:
189225
return child
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
that is licensed under [MIT](http://opensource.org/licenses/MIT).
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
license: apache-2.0

0 commit comments

Comments
 (0)