Skip to content

Commit fb7cedc

Browse files
Filter license intros from dereferenced matches
Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
1 parent 03963fe commit fb7cedc

File tree

2 files changed

+27
-6
lines changed

2 files changed

+27
-6
lines changed

src/licensedcode/detection.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1256,6 +1256,16 @@ def filter_license_references(license_match_objects):
12561256
return filtered_matches
12571257

12581258

1259+
def filter_license_intros_and_references(license_match_objects):
1260+
"""
1261+
Return a filtered ``license_matches`` list of LicenseMatch objects removing
1262+
matches which had references to local files with licenses and spurious matches
1263+
to license introduction statements.
1264+
"""
1265+
filtered_license_match_objects = filter_license_intros(license_match_objects)
1266+
return filter_license_references(filtered_license_match_objects)
1267+
1268+
12591269
def has_references_to_local_files(license_matches):
12601270
"""
12611271
Return True if any of the matched Rule for the ``license_matches`` has a
@@ -1319,37 +1329,37 @@ def get_detected_license_expression(
13191329
if analysis == DetectionCategory.UNKNOWN_REFERENCE_IN_FILE_TO_PACKAGE.value:
13201330
if TRACE_ANALYSIS:
13211331
logger_debug(f'analysis {DetectionCategory.UNKNOWN_REFERENCE_IN_FILE_TO_PACKAGE.value}')
1322-
matches_for_expression = filter_license_references(license_matches)
1332+
matches_for_expression = filter_license_intros_and_references(license_matches)
13231333
detection_log.append(DetectionRule.UNKNOWN_REFERENCE_IN_FILE_TO_PACKAGE.value)
13241334

13251335
elif analysis == DetectionCategory.UNKNOWN_REFERENCE_IN_FILE_TO_NONEXISTENT_PACKAGE.value:
13261336
if TRACE_ANALYSIS:
13271337
logger_debug(f'analysis {DetectionCategory.UNKNOWN_REFERENCE_IN_FILE_TO_NONEXISTENT_PACKAGE.value}')
1328-
matches_for_expression = filter_license_references(license_matches)
1338+
matches_for_expression = filter_license_intros_and_references(license_matches)
13291339
detection_log.append(DetectionRule.UNKNOWN_REFERENCE_IN_FILE_TO_NONEXISTENT_PACKAGE.value)
13301340

13311341
elif analysis == DetectionCategory.UNKNOWN_FILE_REFERENCE_LOCAL.value:
13321342
if TRACE_ANALYSIS:
13331343
logger_debug(f'analysis {DetectionCategory.UNKNOWN_FILE_REFERENCE_LOCAL.value}')
1334-
matches_for_expression = filter_license_references(license_matches)
1344+
matches_for_expression = filter_license_intros_and_references(license_matches)
13351345
detection_log.append(DetectionRule.UNKNOWN_REFERENCE_TO_LOCAL_FILE.value)
13361346

13371347
elif analysis == DetectionCategory.PACKAGE_UNKNOWN_FILE_REFERENCE_LOCAL.value:
13381348
if TRACE_ANALYSIS:
13391349
logger_debug(f'analysis {DetectionCategory.PACKAGE_UNKNOWN_FILE_REFERENCE_LOCAL.value}')
1340-
matches_for_expression = filter_license_references(license_matches)
1350+
matches_for_expression = filter_license_intros_and_references(license_matches)
13411351
detection_log.append(DetectionRule.PACKAGE_UNKNOWN_REFERENCE_TO_LOCAL_FILE.value)
13421352

13431353
elif analysis == DetectionCategory.PACKAGE_ADD_FROM_SIBLING_FILE.value:
13441354
if TRACE_ANALYSIS:
13451355
logger_debug(f'analysis {DetectionCategory.PACKAGE_ADD_FROM_SIBLING_FILE.value}')
1346-
matches_for_expression = filter_license_references(license_matches)
1356+
matches_for_expression = filter_license_intros_and_references(license_matches)
13471357
detection_log.append(DetectionRule.PACKAGE_ADD_FROM_SIBLING_FILE.value)
13481358

13491359
elif analysis == DetectionCategory.PACKAGE_ADD_FROM_FILE.value:
13501360
if TRACE_ANALYSIS:
13511361
logger_debug(f'analysis {DetectionCategory.PACKAGE_ADD_FROM_FILE.value}')
1352-
matches_for_expression = filter_license_references(license_matches)
1362+
matches_for_expression = filter_license_intros_and_references(license_matches)
13531363
detection_log.append(DetectionRule.PACKAGE_ADD_FROM_FILE.value)
13541364

13551365
elif analysis == DetectionCategory.UNKNOWN_MATCH.value:
@@ -1495,6 +1505,9 @@ def get_ambiguous_license_detections_by_type(unique_license_detections):
14951505

14961506
elif is_undetected_license_matches(license_matches=detection.matches):
14971507
ambi_license_detections[DetectionCategory.UNDETECTED_LICENSE.value] = detection
1508+
1509+
elif has_correct_license_clue_matches(license_matches=detection.matches):
1510+
ambi_license_detections[DetectionCategory.LICENSE_CLUES.value] = detection
14981511

14991512
elif "unknown" in detection.license_expression:
15001513
if has_unknown_matches(license_matches=detection.matches):

src/summarycode/todo.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,11 @@ class ReviewComments(Enum):
327327
"rule text is matched, but some unknown extra words have been inserted in "
328328
"the text, which needs to be reviewed. "
329329
)
330+
LICENSE_CLUES = (
331+
"Text which might contain useful clues about license detections, but cannot "
332+
"be considered as a proper license detection. Additional review is needed "
333+
"to determine if this license clue is useful."
334+
)
330335
UNKNOWN_MATCH = (
331336
"The license detection is inconclusive, as the license matches have "
332337
"been matched to rules having unknown as their license key, and these "
@@ -380,6 +385,9 @@ def get_review_comments(detection_log):
380385
if LicenseDetectionCategory.EXTRA_WORDS.value in detection_log:
381386
review_comments[LicenseDetectionCategory.EXTRA_WORDS.value] = ReviewComments.EXTRA_WORDS.value
382387

388+
if LicenseDetectionCategory.LICENSE_CLUES.value in detection_log:
389+
review_comments[LicenseDetectionCategory.LICENSE_CLUES.value] = ReviewComments.LICENSE_CLUES.value
390+
383391
if LicenseDetectionCategory.UNKNOWN_MATCH.value in detection_log:
384392
review_comments[LicenseDetectionCategory.UNKNOWN_MATCH.value] = ReviewComments.UNKNOWN_MATCH.value
385393

0 commit comments

Comments
 (0)