1414import  uuid 
1515from  enum  import  Enum 
1616from  hashlib  import  sha1 
17- from  collections  import  Counter 
1817
1918import  attr 
19+ from  collections  import  defaultdict 
2020from  license_expression  import  combine_expressions 
2121from  license_expression  import  Licensing 
2222
@@ -595,33 +595,25 @@ def get_unique_detections(cls, license_detections):
595595        Return all unique UniqueDetection from a ``license_detections`` list of 
596596        LicenseDetection. 
597597        """ 
598-         identifiers  =  get_identifiers (license_detections )
599-         unique_detection_counts  =  dict (Counter (identifiers ))
600- 
598+         detections_by_id  =  get_detections_by_id (license_detections )
601599        unique_license_detections  =  []
602-         for  detection_identifier  in  unique_detection_counts .keys ():
603-             file_regions  =  (
600+ 
601+         for  all_detections  in  detections_by_id .values ():
602+             file_regions  =  [
604603                detection .file_region 
605-                 for  detection  in  license_detections 
606-                 if  detection_identifier  ==  detection .identifier 
607-             )
608-             all_detections  =  (
609-                 detection 
610-                 for  detection  in  license_detections 
611-                 if  detection_identifier  ==  detection .identifier 
612-             )
604+                 for  detection  in  all_detections 
605+             ]
613606
614-             detection  =  next (all_detections )
607+             detection  =  next (iter ( all_detections ) )
615608            detection_mapping  =  detection .to_dict ()
616-             files  =  list (file_regions )
617609            unique_license_detections .append (
618610                cls (
619611                    identifier = detection .identifier_with_expression ,
620612                    license_expression = detection_mapping ["license_expression" ],
621613                    detection_log = detection_mapping ["detection_log" ],
622614                    matches = detection_mapping ["matches" ],
623-                     count = len (files ),
624-                     files = files ,
615+                     count = len (file_regions ),
616+                     files = file_regions ,
625617                )
626618            )
627619
@@ -638,6 +630,18 @@ def dict_fields(attr, value):
638630        return  attr .asdict (self , filter = dict_fields )
639631
640632
633+ def  get_detections_by_id (license_detections ):
634+     """ 
635+     Get a dict(hashmap) where each item is: {detection.identifier: all_detections} where 
636+     `all_detections` is all detections in `license_detections` whose detection.identifier 
637+     is the same. 
638+     """ 
639+     detections_by_id  =  defaultdict (list )
640+     for  detection  in  license_detections :
641+         detections_by_id [detection .identifier ].append (detection )
642+ 
643+     return  detections_by_id 
644+ 
641645def  get_identifiers (license_detections ):
642646    """ 
643647    Return identifiers for all ``license detections``. 
0 commit comments