Skip to content

Commit 3f78833

Browse files
Add from-file attribute to matches with origin
Adds a new attribute to document the origin path from matches so we can determine in the cases of de-referenced matches, the location these came from, and also to differentiate between matches that come from the same file or from a different file. Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
1 parent 8de1e90 commit 3f78833

File tree

789 files changed

+4833
-48
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

789 files changed

+4833
-48
lines changed

src/licensedcode/detection.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -617,6 +617,7 @@ def from_dict(cls, license_match_mapping):
617617
matched_text = license_match_mapping.get("matched_text") or None
618618

619619
return cls(
620+
from_file=license_match_mapping["from_file"],
620621
start_line=license_match_mapping["start_line"],
621622
end_line=license_match_mapping["end_line"],
622623
match_score=license_match_mapping["score"],
@@ -654,6 +655,7 @@ def to_dict(
654655

655656
# Detection Level Information
656657
result['score'] = self.score()
658+
result['from_file'] = self.from_file
657659
result['start_line'] = self.start_line
658660
result['end_line'] = self.end_line
659661
result['matched_length'] = self.len()
@@ -671,6 +673,21 @@ def to_dict(
671673
return result
672674

673675

676+
def populate_matches_with_path(matches, path):
677+
"""
678+
Given `matches` list of LicenseMatch objects, populate the `from_file`
679+
attribute in them with `path` which is the path for the origin file for
680+
that license match.
681+
"""
682+
for match in matches:
683+
# Here if we have the `from_file` attribute populated already,
684+
# they are from other files, and if it's empty, they are from
685+
# the original resource, so we populate the files with the resource
686+
# path for the original resource of their origin
687+
if not match["from_file"]:
688+
match["from_file"] = path
689+
690+
674691
def collect_license_detections(codebase, include_license_clues=True):
675692
"""
676693
Return a list of LicenseDetectionFromResult object rehydrated from
@@ -680,7 +697,10 @@ def collect_license_detections(codebase, include_license_clues=True):
680697
according to their license detections. This is required because package fields
681698
are populated in package plugin, which runs before the license plugin, and thus
682699
the license plugin step where unknown references to other files are dereferenced
683-
does not show up automatically in package attributes.
700+
does not show up automatically in package attributes.
701+
702+
Also populate from_file attributes with resource paths for matches which have
703+
origin in the same file.
684704
"""
685705
has_packages = hasattr(codebase.root, 'package_data')
686706
has_licenses = hasattr(codebase.root, 'license_detections')
@@ -692,7 +712,11 @@ def collect_license_detections(codebase, include_license_clues=True):
692712
resource_license_detections = []
693713
if has_licenses:
694714
license_detections = getattr(resource, 'license_detections', []) or []
715+
for detection in license_detections:
716+
populate_matches_with_path(matches=detection["matches"], path=resource.path)
695717
license_clues = getattr(resource, 'license_clues', []) or []
718+
populate_matches_with_path(matches=license_clues, path=resource.path)
719+
codebase.save_resource(resource)
696720

697721
if license_detections:
698722
license_detection_objects = detections_from_license_detection_mappings(
@@ -729,6 +753,9 @@ def collect_license_detections(codebase, include_license_clues=True):
729753

730754
package_license_detections = package["license_detections"]
731755
if package_license_detections:
756+
for detection in package_license_detections:
757+
populate_matches_with_path(matches=detection["matches"], path=resource.path)
758+
modified = True
732759
package_license_detection_mappings.extend(package_license_detections)
733760
detection_is_same, license_expression = verify_package_license_expression(
734761
license_detection_mappings=package_license_detections,

src/licensedcode/match.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,17 @@ class LicenseMatch(object):
223223
metadata=dict(help='match end line, 1-based')
224224
)
225225

226+
from_file = attr.ib(
227+
default=None,
228+
metadata=dict(
229+
help='File path where this LicenseMatch was originally detected. '
230+
'This needs to be stored as we bring over LicenseMatches from '
231+
'other files into LicenseDetection objects now, and we need '
232+
'to track the origin for these to be able to determine easily '
233+
'which are native to that file.'
234+
)
235+
)
236+
226237
query = attr.ib(
227238
default=None,
228239
metadata=dict(help='Query object for this match')
@@ -722,7 +733,7 @@ def matched_text(
722733
highlight=True,
723734
highlight_matched='{}',
724735
highlight_not_matched='[{}]',
725-
_usecache=True
736+
_usecache=True,
726737
):
727738
"""
728739
Return the matched text for this match or an empty string if no query
@@ -763,6 +774,7 @@ def to_dict(
763774
include_text=False,
764775
license_text_diagnostics=False,
765776
whole_lines=True,
777+
file_path=None,
766778
):
767779
"""
768780
Return a "result" scan data built from a LicenseMatch object.
@@ -783,6 +795,7 @@ def to_dict(
783795
result['score'] = self.score()
784796
result['start_line'] = self.start_line
785797
result['end_line'] = self.end_line
798+
result['from_file'] = file_path
786799
result['matched_length'] = self.len()
787800
result['match_coverage'] = self.coverage()
788801
result['matcher'] = self.matcher

src/licensedcode/plugin_license.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
from licensedcode.cache import build_spdx_license_expression, get_cache
2222
from licensedcode.detection import collect_license_detections
23+
from licensedcode.detection import populate_matches_with_path
2324
from licensedcode.detection import find_referenced_resource
2425
from licensedcode.detection import get_detected_license_expression
2526
from licensedcode.detection import get_matches_from_detection_mappings
@@ -279,11 +280,14 @@ def add_referenced_filenames_license_matches_for_detections(resource, codebase):
279280
modified = True
280281
detection_modified = True
281282
detections_added.extend(referenced_resource.license_detections)
282-
license_match_mappings.extend(
283-
get_matches_from_detection_mappings(
284-
license_detections=referenced_resource.license_detections
285-
)
283+
matches_to_extend = get_matches_from_detection_mappings(
284+
license_detections=referenced_resource.license_detections
286285
)
286+
populate_matches_with_path(
287+
matches=matches_to_extend,
288+
path=referenced_resource.path
289+
)
290+
license_match_mappings.extend(matches_to_extend)
287291

288292
if not detection_modified:
289293
continue

src/packagedcode/licensing.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from licensedcode.detection import find_referenced_resource
2626
from licensedcode.detection import detect_licenses
2727
from licensedcode.detection import LicenseDetectionFromResult
28+
from licensedcode.detection import populate_matches_with_path
2829
from licensedcode.spans import Span
2930
from licensedcode import query
3031

@@ -113,11 +114,16 @@ def add_referenced_license_matches_for_package(resource, codebase):
113114
if referenced_license_detections:
114115
modified = True
115116
detection_modified = True
116-
license_match_mappings.extend(
117-
get_matches_from_detection_mappings(
118-
license_detections=referenced_license_detections
119-
)
117+
matches_to_extend = get_matches_from_detection_mappings(
118+
license_detections=referenced_license_detections
120119
)
120+
# For LicenseMatches with different resources as origin, add the
121+
# resource path to these matches as origin info
122+
populate_matches_with_path(
123+
matches=matches_to_extend,
124+
path=referenced_resource.path
125+
)
126+
license_match_mappings.extend(matches_to_extend)
121127

122128
if not detection_modified:
123129
continue
@@ -231,6 +237,10 @@ def add_referenced_license_detection_from_package(resource, codebase):
231237
for pkg_detection in pkg_detections:
232238
modified = True
233239
detection_modified = True
240+
populate_matches_with_path(
241+
matches=pkg_detection["matches"],
242+
path=resource.path
243+
)
234244
license_match_mappings.extend(pkg_detection["matches"])
235245
detections_added.append(pkg_detection)
236246
analysis = DetectionCategory.UNKNOWN_REFERENCE_IN_FILE_TO_PACKAGE.value
@@ -347,6 +357,11 @@ def get_license_detections_from_sibling_file(resource, codebase):
347357

348358
license_detections = []
349359
for sibling in siblings:
360+
for detection in sibling.license_detections:
361+
populate_matches_with_path(
362+
matches=detection["matches"],
363+
path=sibling.path
364+
)
350365
license_detections.extend(sibling.license_detections)
351366

352367
if not license_detections:

tests/cluecode/data/plugin_filter_clues/filtered-expected.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
"score": 96.07,
3737
"start_line": 7,
3838
"end_line": 70,
39+
"from_file": "LICENSE",
3940
"matched_length": 367,
4041
"match_coverage": 100.0,
4142
"matcher": "3-seq",

tests/cluecode/data/plugin_filter_clues/filtered-expected2.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
"score": 100.0,
3737
"start_line": 7,
3838
"end_line": 22,
39+
"from_file": "LICENSE2",
3940
"matched_length": 145,
4041
"match_coverage": 100.0,
4142
"matcher": "2-aho",

tests/cluecode/data/plugin_filter_clues/filtered-expected3.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
"score": 100.0,
3737
"start_line": 1,
3838
"end_line": 47,
39+
"from_file": "LICENSE3",
3940
"matched_length": 303,
4041
"match_coverage": 100.0,
4142
"matcher": "1-hash",

tests/formattedcode/data/common/manifests-expected.json

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
"score": 100.0,
3535
"start_line": 1,
3636
"end_line": 1,
37+
"from_file": null,
3738
"matched_length": 8,
3839
"match_coverage": 100.0,
3940
"matcher": "2-aho",
@@ -47,6 +48,7 @@
4748
"score": 100.0,
4849
"start_line": 2,
4950
"end_line": 2,
51+
"from_file": null,
5052
"matched_length": 7,
5153
"match_coverage": 100.0,
5254
"matcher": "2-aho",
@@ -125,6 +127,7 @@
125127
"score": 100.0,
126128
"start_line": 1,
127129
"end_line": 1,
130+
"from_file": null,
128131
"matched_length": 3,
129132
"match_coverage": 100.0,
130133
"matcher": "1-hash",
@@ -144,6 +147,7 @@
144147
"score": 100.0,
145148
"start_line": 1,
146149
"end_line": 1,
150+
"from_file": null,
147151
"matched_length": 3,
148152
"match_coverage": 100.0,
149153
"matcher": "1-hash",
@@ -218,6 +222,7 @@
218222
"score": 100.0,
219223
"start_line": 1,
220224
"end_line": 1,
225+
"from_file": null,
221226
"matched_length": 1,
222227
"match_coverage": 100.0,
223228
"matcher": "1-spdx-id",
@@ -640,6 +645,7 @@
640645
"score": 100.0,
641646
"start_line": 1,
642647
"end_line": 1,
648+
"from_file": "manifests/maven/persistence-api-1.0.pom",
643649
"matched_length": 8,
644650
"match_coverage": 100.0,
645651
"matcher": "2-aho",
@@ -653,6 +659,7 @@
653659
"score": 100.0,
654660
"start_line": 2,
655661
"end_line": 2,
662+
"from_file": "manifests/maven/persistence-api-1.0.pom",
656663
"matched_length": 7,
657664
"match_coverage": 100.0,
658665
"matcher": "2-aho",
@@ -697,6 +704,7 @@
697704
"score": 16.0,
698705
"start_line": 17,
699706
"end_line": 19,
707+
"from_file": "manifests/maven/persistence-api-1.0.pom",
700708
"matched_length": 3,
701709
"match_coverage": 100.0,
702710
"matcher": "2-aho",
@@ -709,6 +717,7 @@
709717
"score": 82.35,
710718
"start_line": 18,
711719
"end_line": 20,
720+
"from_file": "manifests/maven/persistence-api-1.0.pom",
712721
"matched_length": 14,
713722
"match_coverage": 82.35,
714723
"matcher": "3-seq",
@@ -829,6 +838,7 @@
829838
"score": 100.0,
830839
"start_line": 1,
831840
"end_line": 1,
841+
"from_file": "manifests/npm-license-mapping/package.json",
832842
"matched_length": 3,
833843
"match_coverage": 100.0,
834844
"matcher": "1-hash",
@@ -848,6 +858,7 @@
848858
"score": 100.0,
849859
"start_line": 1,
850860
"end_line": 1,
861+
"from_file": "manifests/npm-license-mapping/package.json",
851862
"matched_length": 3,
852863
"match_coverage": 100.0,
853864
"matcher": "1-hash",
@@ -991,6 +1002,7 @@
9911002
"score": 100.0,
9921003
"start_line": 6,
9931004
"end_line": 6,
1005+
"from_file": "manifests/npm-license-mapping/package.json",
9941006
"matched_length": 4,
9951007
"match_coverage": 100.0,
9961008
"matcher": "2-aho",
@@ -1009,6 +1021,7 @@
10091021
"score": 100.0,
10101022
"start_line": 20,
10111023
"end_line": 20,
1024+
"from_file": "manifests/npm-license-mapping/package.json",
10121025
"matched_length": 3,
10131026
"match_coverage": 100.0,
10141027
"matcher": "2-aho",
@@ -1133,6 +1146,7 @@
11331146
"score": 100.0,
11341147
"start_line": 1,
11351148
"end_line": 1,
1149+
"from_file": "manifests/npm-license-string/package.json",
11361150
"matched_length": 1,
11371151
"match_coverage": 100.0,
11381152
"matcher": "1-spdx-id",
@@ -1226,6 +1240,7 @@
12261240
"score": 100.0,
12271241
"start_line": 4,
12281242
"end_line": 4,
1243+
"from_file": "manifests/npm-license-string/package.json",
12291244
"matched_length": 2,
12301245
"match_coverage": 100.0,
12311246
"matcher": "2-aho",
@@ -1354,6 +1369,7 @@
13541369
"score": 100.0,
13551370
"start_line": 1,
13561371
"end_line": 1,
1372+
"from_file": "manifests/pypi/bluepyopt_setup.py",
13571373
"matched_length": 1,
13581374
"match_coverage": 100.0,
13591375
"matcher": "1-hash",
@@ -1373,6 +1389,7 @@
13731389
"score": 100.0,
13741390
"start_line": 1,
13751391
"end_line": 1,
1392+
"from_file": "manifests/pypi/bluepyopt_setup.py",
13761393
"matched_length": 10,
13771394
"match_coverage": 100.0,
13781395
"matcher": "1-hash",
@@ -1524,6 +1541,7 @@
15241541
"score": 100.0,
15251542
"start_line": 9,
15261543
"end_line": 20,
1544+
"from_file": "manifests/pypi/bluepyopt_setup.py",
15271545
"matched_length": 106,
15281546
"match_coverage": 100.0,
15291547
"matcher": "2-aho",
@@ -1542,6 +1560,7 @@
15421560
"score": 100.0,
15431561
"start_line": 65,
15441562
"end_line": 65,
1563+
"from_file": "manifests/pypi/bluepyopt_setup.py",
15451564
"matched_length": 2,
15461565
"match_coverage": 100.0,
15471566
"matcher": "2-aho",
@@ -1560,6 +1579,7 @@
15601579
"score": 100.0,
15611580
"start_line": 74,
15621581
"end_line": 75,
1582+
"from_file": "manifests/pypi/bluepyopt_setup.py",
15631583
"matched_length": 10,
15641584
"match_coverage": 100.0,
15651585
"matcher": "2-aho",

0 commit comments

Comments
 (0)