Skip to content

Commit 44ab276

Browse files
Merge pull request #3247 from nexB/3245-lic-detection-stuck
Fix choking license detection post-processing #3245
2 parents 6358a4b + f2a08de commit 44ab276

File tree

1 file changed

+22
-18
lines changed

1 file changed

+22
-18
lines changed

src/licensedcode/detection.py

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@
1414
import uuid
1515
from enum import Enum
1616
from hashlib import sha1
17-
from collections import Counter
1817

1918
import attr
19+
from collections import defaultdict
2020
from license_expression import combine_expressions
2121
from license_expression import Licensing
2222

@@ -595,33 +595,25 @@ def get_unique_detections(cls, license_detections):
595595
Return all unique UniqueDetection from a ``license_detections`` list of
596596
LicenseDetection.
597597
"""
598-
identifiers = get_identifiers(license_detections)
599-
unique_detection_counts = dict(Counter(identifiers))
600-
598+
detections_by_id = get_detections_by_id(license_detections)
601599
unique_license_detections = []
602-
for detection_identifier in unique_detection_counts.keys():
603-
file_regions = (
600+
601+
for all_detections in detections_by_id.values():
602+
file_regions = [
604603
detection.file_region
605-
for detection in license_detections
606-
if detection_identifier == detection.identifier
607-
)
608-
all_detections = (
609-
detection
610-
for detection in license_detections
611-
if detection_identifier == detection.identifier
612-
)
604+
for detection in all_detections
605+
]
613606

614-
detection = next(all_detections)
607+
detection = next(iter(all_detections))
615608
detection_mapping = detection.to_dict()
616-
files = list(file_regions)
617609
unique_license_detections.append(
618610
cls(
619611
identifier=detection.identifier_with_expression,
620612
license_expression=detection_mapping["license_expression"],
621613
detection_log=detection_mapping["detection_log"],
622614
matches=detection_mapping["matches"],
623-
count=len(files),
624-
files=files,
615+
count=len(file_regions),
616+
files=file_regions,
625617
)
626618
)
627619

@@ -638,6 +630,18 @@ def dict_fields(attr, value):
638630
return attr.asdict(self, filter=dict_fields)
639631

640632

633+
def get_detections_by_id(license_detections):
634+
"""
635+
Get a dict(hashmap) where each item is: {detection.identifier: all_detections} where
636+
`all_detections` is all detections in `license_detections` whose detection.identifier
637+
is the same.
638+
"""
639+
detections_by_id = defaultdict(list)
640+
for detection in license_detections:
641+
detections_by_id[detection.identifier].append(detection)
642+
643+
return detections_by_id
644+
641645
def get_identifiers(license_detections):
642646
"""
643647
Return identifiers for all ``license detections``.

0 commit comments

Comments
 (0)