|
1 | 1 | import json |
| 2 | +import os |
| 3 | +import tempfile |
2 | 4 | import unittest |
3 | 5 | from os.path import abspath, dirname |
4 | 6 |
|
@@ -95,6 +97,55 @@ def test_parse_multilabel_classification_csv(self): |
95 | 97 | self.assertEqual(img1["annotationfile"]["type"], "classification_multilabel") |
96 | 98 | self.assertEqual(set(img1["annotationfile"]["labels"]), {"Blackheads"}) |
97 | 99 |
|
| 100 | + def test_coco_with_subdir_file_name_should_match_annotations(self): |
| 101 | + # COCO file_name includes a subdirectory, but the actual image is at dataset root. |
| 102 | + with tempfile.TemporaryDirectory() as tmpdir: |
| 103 | + # Create nested image path: /2/100002/img.jpeg |
| 104 | + image_name = "metaclip_2_100002_02f2f7c6e15f09b401575ae6.jpeg" |
| 105 | + image_relpath = os.path.join("2", "100002", image_name) |
| 106 | + image_path = os.path.join(tmpdir, image_name) |
| 107 | + # Create an empty image file (content not used by parser) |
| 108 | + open(image_path, "wb").close() |
| 109 | + |
| 110 | + # Create COCO annotation JSON at dataset root, referencing the image with subdir in file_name |
| 111 | + coco = { |
| 112 | + "info": {}, |
| 113 | + "licenses": [], |
| 114 | + "categories": [{"id": 1, "name": "thing"}], |
| 115 | + "images": [ |
| 116 | + { |
| 117 | + "id": 10000000, |
| 118 | + "file_name": image_relpath.replace(os.sep, "/"), |
| 119 | + "width": 800, |
| 120 | + "height": 533, |
| 121 | + } |
| 122 | + ], |
| 123 | + "annotations": [ |
| 124 | + { |
| 125 | + "id": 1, |
| 126 | + "image_id": 10000000, |
| 127 | + "category_id": 1, |
| 128 | + "bbox": [10, 10, 100, 50], |
| 129 | + "area": 5000, |
| 130 | + "segmentation": [], |
| 131 | + "iscrowd": 0, |
| 132 | + } |
| 133 | + ], |
| 134 | + } |
| 135 | + coco_path = os.path.join(tmpdir, "_annotations.coco.json") |
| 136 | + with open(coco_path, "w") as f: |
| 137 | + json.dump(coco, f) |
| 138 | + |
| 139 | + parsed = folderparser.parsefolder(tmpdir) |
| 140 | + # Image entries store file with a leading slash relative to root |
| 141 | + expected_file_key = f"/{image_name}" |
| 142 | + img_entries = [i for i in parsed["images"] if i["file"] == expected_file_key] |
| 143 | + self.assertTrue(len(img_entries) == 1) |
| 144 | + img_entry = img_entries[0] |
| 145 | + |
| 146 | + # Expect annotationfile to be populated, but this currently fails due to basename-only matching |
| 147 | + self.assertIsNotNone(img_entry.get("annotationfile")) |
| 148 | + |
98 | 149 |
|
99 | 150 | def _assertJsonMatchesFile(actual, filename): |
100 | 151 | with open(filename) as file: |
|
0 commit comments