1414import uuid
1515from enum import Enum
1616from hashlib import sha1
17- from collections import Counter
1817
1918import attr
19+ from collections import defaultdict
2020from license_expression import combine_expressions
2121from license_expression import Licensing
2222
@@ -595,33 +595,25 @@ def get_unique_detections(cls, license_detections):
595595 Return all unique UniqueDetection from a ``license_detections`` list of
596596 LicenseDetection.
597597 """
598- identifiers = get_identifiers (license_detections )
599- unique_detection_counts = dict (Counter (identifiers ))
600-
598+ detections_by_id = get_detections_by_id (license_detections )
601599 unique_license_detections = []
602- for detection_identifier in unique_detection_counts .keys ():
603- file_regions = (
600+
601+ for all_detections in detections_by_id .values ():
602+ file_regions = [
604603 detection .file_region
605- for detection in license_detections
606- if detection_identifier == detection .identifier
607- )
608- all_detections = (
609- detection
610- for detection in license_detections
611- if detection_identifier == detection .identifier
612- )
604+ for detection in all_detections
605+ ]
613606
614- detection = next (all_detections )
607+ detection = next (iter ( all_detections ) )
615608 detection_mapping = detection .to_dict ()
616- files = list (file_regions )
617609 unique_license_detections .append (
618610 cls (
619611 identifier = detection .identifier_with_expression ,
620612 license_expression = detection_mapping ["license_expression" ],
621613 detection_log = detection_mapping ["detection_log" ],
622614 matches = detection_mapping ["matches" ],
623- count = len (files ),
624- files = files ,
615+ count = len (file_regions ),
616+ files = file_regions ,
625617 )
626618 )
627619
@@ -638,6 +630,18 @@ def dict_fields(attr, value):
638630 return attr .asdict (self , filter = dict_fields )
639631
640632
633+ def get_detections_by_id (license_detections ):
634+ """
635+ Get a dict(hashmap) where each item is: {detection.identifier: all_detections} where
636+ `all_detections` is all detections in `license_detections` whose detection.identifier
637+ is the same.
638+ """
639+ detections_by_id = defaultdict (list )
640+ for detection in license_detections :
641+ detections_by_id [detection .identifier ].append (detection )
642+
643+ return detections_by_id
644+
641645def get_identifiers (license_detections ):
642646 """
643647 Return identifiers for all ``license detections``.
0 commit comments