@@ -99,43 +99,45 @@ def _map_annotations_to_images_1to1(images, annotations):
9999 return countmapped > 0
100100
101101
102- def _map_annotations_to_images_1tomany (images , annotations ):
103- annotationsByDirname = _list_map (annotations , "dirname" )
104-
105- imageByFilename = {}
106- annotationByDirectoryImageId = defaultdict (list )
107-
108- for directory , annotation_files in annotationsByDirname .items ():
109- parsed_data = annotation_files [0 ]["parsed" ]
110- parsed_type = annotation_files [0 ]["parsedType" ]
111-
112- # Process only if the format is COCO
113- if parsed_type == "coco" :
114- for image_info in parsed_data ["images" ]:
115- imageByFilename [image_info ["file_name" ]] = image_info
116-
117- for annotation in parsed_data ["annotations" ]:
118- # Since image_id aren't unique across directories, create a unique key with the directory name.
119- key = f"{ directory } /{ annotation ['image_id' ]} "
120- annotationByDirectoryImageId [key ].append (annotation )
102+ def _map_annotations_to_images_1tomany (images , annotationFiles ):
103+ annotationsByDirname = _list_map (annotationFiles , "dirname" )
104+ imgRefMap , annotationMap = _build_image_and_annotation_maps (annotationFiles )
121105
122106 for image in tqdm (images ):
123107 dirname = image ["dirname" ]
124108 annotationsInSameDir = annotationsByDirname .get (dirname , [])
125109 if annotationsInSameDir :
126110 if len (annotationsInSameDir ) > 1 :
127111 print (f"warning: found multiple annotation files on dir { dirname } " )
128- annotation = annotationsInSameDir [0 ]
129- format = annotation ["parsedType" ]
112+ annotationFile = annotationsInSameDir [0 ]
113+ format = annotationFile ["parsedType" ]
130114 image ["annotationfile" ] = _filterIndividualAnnotations (
131- image , annotation , format , imageByFilename , annotationByDirectoryImageId
115+ image , annotationFile , format , imgRefMap , annotationMap
132116 )
133117
134118
135- def _filterIndividualAnnotations (image , annotation , format , img_dict , anno_dict ):
119+ def _build_image_and_annotation_maps (annotationFiles ):
120+ imgRefMap = {}
121+ annotationMap = defaultdict (list )
122+ for annFile in annotationFiles :
123+ filename , dirname , parsed , parsedType = (
124+ annFile ["file" ],
125+ annFile ["dirname" ],
126+ annFile ["parsed" ],
127+ annFile ["parsedType" ],
128+ )
129+ if parsedType == "coco" :
130+ for imageRef in parsed ["images" ]:
131+ imgRefMap [f"{ filename } /{ imageRef ['file_name' ]} " ] = imageRef
132+ for annotation in parsed ["annotations" ]:
133+ annotationMap [f"{ dirname } /{ annotation ['image_id' ]} " ].append (annotation )
134+ return imgRefMap , annotationMap
135+
136+
137+ def _filterIndividualAnnotations (image , annotation , format , imgRefMap , annotationMap ):
136138 parsed = annotation ["parsed" ]
137139 if format == "coco" :
138- imgReference = img_dict .get (image [" name" ] )
140+ imgReference = imgRefMap .get (f" { annotation [ 'file' ] } / { image [' name' ] } " )
139141 if imgReference :
140142 # workaround to make Annotations.js correctly identify this as coco in the backend
141143 fake_annotation = {
@@ -147,8 +149,7 @@ def _filterIndividualAnnotations(image, annotation, format, img_dict, anno_dict)
147149 "iscrowd" : 0 ,
148150 }
149151 _annotation = {"name" : "annotation.coco.json" }
150- key = f"{ image ['dirname' ]} /{ imgReference ['id' ]} "
151- annotations_for_image = anno_dict .get (key , [])
152+ annotations_for_image = annotationMap .get (f"{ image ['dirname' ]} /{ imgReference ['id' ]} " , [])
152153 _annotation ["rawText" ] = json .dumps (
153154 {
154155 "info" : parsed ["info" ],
0 commit comments