Skip to content

Commit 6c6b25a

Browse files
authored
Merge pull request #2681 from nexB/1364-license-referenced-filenanames
Improve license referenced_filenames handling #1364
2 parents e912830 + cdd4c0e commit 6c6b25a

File tree

5 files changed

+1084
-53
lines changed

5 files changed

+1084
-53
lines changed

src/licensedcode/plugin_license.py

Lines changed: 79 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,38 @@
1111

1212
import attr
1313

14-
from commoncode import fileutils
15-
from commoncode.cliutils import PluggableCommandLineOption
1614
from plugincode.scan import ScanPlugin
1715
from plugincode.scan import scan_impl
1816
from commoncode.cliutils import MISC_GROUP
17+
from commoncode.cliutils import PluggableCommandLineOption
1918
from commoncode.cliutils import SCAN_OPTIONS_GROUP
2019
from commoncode.cliutils import SCAN_GROUP
20+
from commoncode.fileutils import file_name
2121
from scancode.api import SCANCODE_LICENSEDB_URL
2222

2323

24+
TRACE = True
25+
26+
def logger_debug(*args): pass
27+
28+
29+
if TRACE:
30+
use_print = True
31+
if use_print:
32+
prn = print
33+
else:
34+
import logging
35+
import sys
36+
logger = logging.getLogger(__name__)
37+
# logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
38+
logging.basicConfig(stream=sys.stdout)
39+
logger.setLevel(logging.DEBUG)
40+
prn = logger.debug
41+
42+
def logger_debug(*args):
43+
return prn(' '.join(isinstance(a, str) and a or repr(a) for a in args))
44+
45+
2446
def reindex_licenses(ctx, param, value):
2547
if not value or ctx.resilient_parsing:
2648
return
@@ -119,71 +141,85 @@ def get_scanner(
119141
)
120142

121143
def process_codebase(self, codebase, **kwargs):
122-
144+
123145
if codebase.has_single_resource:
124146
return
125147

126-
for resource in codebase.walk(topdown=False):
127-
match_reference_license(resource,codebase)
128-
129-
130-
def match_reference_license(resource, codebase):
148+
for resource in codebase.walk():
149+
if TRACE:
150+
license_expressions_before = list(resource.license_expressions)
151+
modified = add_referenced_filenames_license_matches(resource, codebase)
152+
if TRACE and modified:
153+
license_expressions_after = list(resource.license_expressions)
154+
logger_debug(
155+
f'add_referenced_filenames_license_matches: Modfied:',
156+
f'{resource} with license_expressions:\n'
157+
f'before: {license_expressions_before}\n'
158+
f'after : {license_expressions_after}'
159+
)
160+
161+
162+
def add_referenced_filenames_license_matches(resource, codebase):
131163
"""
132-
Return the ``resource`` Resource updating and saving it in place, after adding new
133-
license matches (licenses and license_expressions) following their Rule
134-
``referenced_filenames`` if any. Return None if this is not a file Resource.
164+
Return an updated ``resource`` saving it in place, after adding new license
165+
matches (licenses and license_expressions) following their Rule
166+
``referenced_filenames`` if any. Return None if ``resource`` is not a file
167+
Resource or was not updated.
135168
"""
136169
if not resource.is_file:
137170
return
138171

139-
licenses = resource.licenses
172+
license_matches = resource.licenses
173+
if not license_matches:
174+
return
175+
140176
license_expressions = resource.license_expressions
141-
if not licenses:
142-
return
143177

144-
referenced_licenses = []
145-
referenced_license_expressions = []
146-
referenced_filenames = get_referenced_filenames(licenses)
147178
modified = False
148-
149-
for referenced_filename in referenced_filenames:
150-
new_resource = find_referenced_resource(referenced_filename=referenced_filename, resource=resource, codebase=codebase)
151-
if new_resource:
179+
180+
for referenced_filename in get_referenced_filenames(license_matches):
181+
referenced_resource = find_referenced_resource(
182+
referenced_filename=referenced_filename,
183+
resource=resource,
184+
codebase=codebase,
185+
)
186+
187+
if referenced_resource and referenced_resource.licenses:
152188
modified = True
153-
referenced_licenses.extend(new_resource.licenses)
154-
referenced_license_expressions.extend(new_resource.license_expressions)
189+
# TODO: we should hint that these matches were defererenced from
190+
# following a referenced filename
191+
license_matches.extend(referenced_resource.licenses)
192+
license_expressions.extend(referenced_resource.license_expressions)
155193

156-
licenses.extend(referenced_licenses)
157-
license_expressions.extend(referenced_license_expressions)
158-
159194
if modified:
160195
codebase.save_resource(resource)
161-
return resource
196+
return resource
162197

163198

164199
def get_referenced_filenames(license_matches):
165200
"""
166-
Return a list of unique referenced filenames found in the rules of a list of ``license_matches``
201+
Return a list of unique referenced filenames found in the rules of a list of
202+
``license_matches``
167203
"""
168-
referenced_filenames = []
204+
unique_filenames = []
169205
for license_match in license_matches:
170-
referenced_files = license_match['matched_rule']['referenced_filenames']
171-
for referenced_filename in referenced_files:
172-
if not referenced_filename in referenced_filenames:
173-
referenced_filenames.append(referenced_filename)
174-
175-
return referenced_filenames
206+
for filename in license_match['matched_rule']['referenced_filenames']:
207+
if filename not in unique_filenames:
208+
unique_filenames.append(filename)
209+
210+
return unique_filenames
176211

177212

178213
def find_referenced_resource(referenced_filename, resource, codebase, **kwargs):
179214
"""
180-
Return a Resource matching the ``referenced_filename`` path or filename given a ``resource`` in ``codebase``.
181-
Return None if the ``referenced_filename`` cannot be found in the same directory as the base ``resource``.
182-
``referenced_filename`` is the path or filename referenced in a LicenseMatch of ``resource``,
215+
Return a Resource matching the ``referenced_filename`` path or filename
216+
given a ``resource`` in ``codebase``. Return None if the
217+
``referenced_filename`` cannot be found in the same directory as the base
218+
``resource``. ``referenced_filename`` is the path or filename referenced in
219+
a LicenseMatch of ``resource``,
183220
"""
184-
parent = resource.parent(codebase)
185-
186-
for child in parent.children(codebase):
187-
path = child.path
188-
if path.endswith(referenced_filename) or fileutils.file_base_name(child.path) == referenced_filename:
221+
# this can be a path
222+
ref_filename = file_name(referenced_filename)
223+
for child in resource.parent(codebase).children(codebase):
224+
if child.name == ref_filename:
189225
return child
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
that is licensed under [MIT](http://opensource.org/licenses/MIT).
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
license: apache-2.0

0 commit comments

Comments
 (0)