update converting unique coco labels (#27)

Laughing-q · web-flow · commit e30667d99c5c · 2022-08-30T17:03:31.000+02:00
diff --git a/general_json2yolo.py b/general_json2yolo.py
@@ -4,6 +4,7 @@
 import cv2
 import pandas as pd
 from PIL import Image
+from collections import defaultdict
 
 from utils import *
 
@@ -262,36 +263,49 @@ def convert_coco_json(json_dir='../coco/annotations/', use_segments=False, cls91
 
         # Create image dict
         images = {'%g' % x['id']: x for x in data['images']}
+        # Create image-annotations dict
+        imgToAnns = defaultdict(list)
+        for ann in data['annotations']:
+            imgToAnns[ann['image_id']].append(ann)
 
         # Write labels file
-        for x in tqdm(data['annotations'], desc=f'Annotations {json_file}'):
-            if x['iscrowd']:
-                continue
-
-            img = images['%g' % x['image_id']]
+        for img_id, anns in tqdm(imgToAnns.items(), desc=f'Annotations {json_file}'):
+            img = images['%g' % img_id]
             h, w, f = img['height'], img['width'], img['file_name']
 
-            # The COCO box format is [top left x, top left y, width, height]
-            box = np.array(x['bbox'], dtype=np.float64)
-            box[:2] += box[2:] / 2  # xy top-left corner to center
-            box[[0, 2]] /= w  # normalize x
-            box[[1, 3]] /= h  # normalize y
-
-            # Segments
-            if use_segments:
-                if len(x['segmentation']) > 1:
-                    s = merge_multi_segment(x['segmentation'])
-                    s = (np.concatenate(s, axis=0) / np.array([w, h])).reshape(-1).tolist()
-
-                else:
-                    segments = [j for i in x['segmentation'] for j in i]  # all segments concatenated
-                    s = (np.array(segments).reshape(-1, 2) / np.array([w, h])).reshape(-1).tolist()
+            bboxes = []
+            segments = []
+            for ann in anns:
+                if ann['iscrowd']:
+                    continue
+                # The COCO box format is [top left x, top left y, width, height]
+                box = np.array(ann['bbox'], dtype=np.float64)
+                box[:2] += box[2:] / 2  # xy top-left corner to center
+                box[[0, 2]] /= w  # normalize x
+                box[[1, 3]] /= h  # normalize y
+                if box[2] <= 0 or box[3] <= 0:  # if w <= 0 and h <= 0
+                    continue
+
+                cls = coco80[ann['category_id'] - 1] if cls91to80 else ann['category_id'] - 1  # class
+                box = [cls] + box.tolist()
+                if box not in bboxes:
+                    bboxes.append(box)
+                # Segments
+                if use_segments:
+                    if len(ann['segmentation']) > 1:
+                        s = merge_multi_segment(ann['segmentation'])
+                        s = (np.concatenate(s, axis=0) / np.array([w, h])).reshape(-1).tolist()
+                    else:
+                        s = [j for i in ann['segmentation'] for j in i]  # all segments concatenated
+                        s = (np.array(s).reshape(-1, 2) / np.array([w, h])).reshape(-1).tolist()
+                    s = [cls] + s
+                    if s not in segments:
+                        segments.append(s)
 
             # Write
-            if box[2] > 0 and box[3] > 0:  # if w > 0 and h > 0
-                cls = coco80[x['category_id'] - 1] if cls91to80 else x['category_id'] - 1  # class
-                line = cls, *(s if use_segments else box)  # cls, box or segments
-                with open((fn / f).with_suffix('.txt'), 'a') as file:
+            with open((fn / f).with_suffix('.txt'), 'a') as file:
+                for i in range(len(bboxes)):
+                    line = *(segments[i] if use_segments else bboxes[i]),  # cls, box or segments
                     file.write(('%g ' * len(line)).rstrip() % line + '\n')