Skip to content

Commit bad8588

Browse files
committed
Be more flexible with filename matches
1 parent 7034b60 commit bad8588

File tree

2 files changed

+71
-2
lines changed

2 files changed

+71
-2
lines changed

roboflow/util/folderparser.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,17 @@ def _build_image_and_annotation_maps(annotationFiles):
140140
)
141141
if parsedType == "coco":
142142
for imageRef in parsed["images"]:
143-
imgRefMap[f"{filename}/{imageRef['file_name']}"] = imageRef
143+
# Normalize and index by multiple forms to improve matching robustness
144+
file_name = _patch_sep(imageRef["file_name"]).lstrip("/")
145+
basename = os.path.basename(file_name)
146+
stem = os.path.splitext(basename)[0]
147+
148+
# Prefer full relative path, but also allow basename and stem
149+
imgRefMap.update({
150+
f"{filename}/{file_name}": imageRef,
151+
f"{filename}/{basename}": imageRef,
152+
f"{filename}/{stem}": imageRef,
153+
})
144154
for annotation in parsed["annotations"]:
145155
annotationMap[f"{dirname}/{annotation['image_id']}"].append(annotation)
146156
return imgRefMap, annotationMap
@@ -149,7 +159,15 @@ def _build_image_and_annotation_maps(annotationFiles):
149159
def _filterIndividualAnnotations(image, annotation, format, imgRefMap, annotationMap):
150160
parsed = annotation["parsed"]
151161
if format == "coco":
152-
imgReference = imgRefMap.get(f"{annotation['file']}/{image['name']}")
162+
rel_path = image["file"].lstrip("/")
163+
imgReference = (
164+
# Try matching by full relative path first
165+
imgRefMap.get(f"{annotation['file']}/{rel_path}")
166+
# Fallback: basename with extension
167+
or imgRefMap.get(f"{annotation['file']}/{image['name']}")
168+
# Fallback: stem (no extension)
169+
or imgRefMap.get(f"{annotation['file']}/{image['key']}")
170+
)
153171
if imgReference:
154172
# workaround to make Annotations.js correctly identify this as coco in the backend
155173
fake_annotation = {

tests/util/test_folderparser.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
import json
2+
import os
3+
import tempfile
24
import unittest
35
from os.path import abspath, dirname
46

@@ -95,6 +97,55 @@ def test_parse_multilabel_classification_csv(self):
9597
self.assertEqual(img1["annotationfile"]["type"], "classification_multilabel")
9698
self.assertEqual(set(img1["annotationfile"]["labels"]), {"Blackheads"})
9799

100+
def test_coco_with_subdir_file_name_should_match_annotations(self):
101+
# COCO file_name includes a subdirectory, but the actual image is at dataset root.
102+
with tempfile.TemporaryDirectory() as tmpdir:
103+
# Create nested image path: /2/100002/img.jpeg
104+
image_name = "metaclip_2_100002_02f2f7c6e15f09b401575ae6.jpeg"
105+
image_relpath = os.path.join("2", "100002", image_name)
106+
image_path = os.path.join(tmpdir, image_name)
107+
# Create an empty image file (content not used by parser)
108+
open(image_path, "wb").close()
109+
110+
# Create COCO annotation JSON at dataset root, referencing the image with subdir in file_name
111+
coco = {
112+
"info": {},
113+
"licenses": [],
114+
"categories": [{"id": 1, "name": "thing"}],
115+
"images": [
116+
{
117+
"id": 10000000,
118+
"file_name": image_relpath.replace(os.sep, "/"),
119+
"width": 800,
120+
"height": 533,
121+
}
122+
],
123+
"annotations": [
124+
{
125+
"id": 1,
126+
"image_id": 10000000,
127+
"category_id": 1,
128+
"bbox": [10, 10, 100, 50],
129+
"area": 5000,
130+
"segmentation": [],
131+
"iscrowd": 0,
132+
}
133+
],
134+
}
135+
coco_path = os.path.join(tmpdir, "_annotations.coco.json")
136+
with open(coco_path, "w") as f:
137+
json.dump(coco, f)
138+
139+
parsed = folderparser.parsefolder(tmpdir)
140+
# Image entries store file with a leading slash relative to root
141+
expected_file_key = f"/{image_name}"
142+
img_entries = [i for i in parsed["images"] if i["file"] == expected_file_key]
143+
self.assertTrue(len(img_entries) == 1)
144+
img_entry = img_entries[0]
145+
146+
# Expect annotationfile to be populated, but this currently fails due to basename-only matching
147+
self.assertIsNotNone(img_entry.get("annotationfile"))
148+
98149

99150
def _assertJsonMatchesFile(actual, filename):
100151
with open(filename) as file:

0 commit comments

Comments
 (0)