Skip to content

Commit 3b1aaab

Browse files
authored
Merge pull request #320 from JdeRobot/issue-315
Add support for YOLO-styled datasets and torchscript models
2 parents ec20ff3 + 0d2b2e9 commit 3b1aaab

File tree

15 files changed

+1679
-135
lines changed

15 files changed

+1679
-135
lines changed

README.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,8 @@ Now, we're excited to introduce ***DetectionMetrics v2***! While retaining the f
4646
<tr>
4747
<td>Object detection</td>
4848
<td>Image</td>
49-
<td>COCO, custom formats</td>
50-
<td>PyTorch</td>
49+
<td>COCO, YOLO</td>
50+
<td>PyTorch (tested with torchvision and torchscript-exported YOLO models)</td>
5151
</tr>
5252
</tbody>
5353
</table>
@@ -118,6 +118,8 @@ For detailed GUI documentation, see our [GUI guide](https://jderobot.github.io/D
118118

119119
🧑‍🏫️ [Image Detection Tutorial](https://github.com/JdeRobot/DetectionMetrics/blob/master/examples/tutorial_image_detection.ipynb)
120120

121+
🧑‍🏫️ [Image Detection Tutorial (YOLO)](https://github.com/JdeRobot/DetectionMetrics/blob/master/examples/tutorial_image_detection_yolo.ipynb)
122+
121123
You can check the `examples` directory for further inspiration. If you are using *poetry*, you can run the scripts provided either by activating the created environment using `poetry shell` or directly running `poetry run python examples/<some_python_script.py>`.
122124

123125
## Command-line interface

app.py

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def browse_folder():
7979

8080
# Initialize commonly used session state keys
8181
st.session_state.setdefault("dataset_path", "")
82-
st.session_state.setdefault("dataset_type_selectbox", "Coco")
82+
st.session_state.setdefault("dataset_type_selectbox", "COCO")
8383
st.session_state.setdefault("split_selectbox", "val")
8484
st.session_state.setdefault("config_option", "Manual Configuration")
8585
st.session_state.setdefault("confidence_threshold", 0.5)
@@ -97,23 +97,23 @@ def browse_folder():
9797
# First row: Type and Split
9898
col1, col2 = st.columns(2)
9999
with col1:
100-
st.selectbox(
100+
dataset_type_selectbox = st.selectbox(
101101
"Type",
102-
["Coco", "Custom"],
102+
["COCO", "YOLO"],
103103
key="dataset_type_selectbox",
104104
)
105105
with col2:
106106
st.selectbox(
107107
"Split",
108-
["train", "val"],
108+
["train", "val", "test"],
109109
key="split_selectbox",
110110
)
111111

112112
# Second row: Path and Browse button
113113
col1, col2 = st.columns([3, 1])
114114
with col1:
115115
dataset_path_input = st.text_input(
116-
"Dataset Folder Path",
116+
"Dataset Folder",
117117
value=st.session_state.get("dataset_path", ""),
118118
key="dataset_path_input",
119119
)
@@ -129,15 +129,32 @@ def browse_folder():
129129
elif folder is not None:
130130
st.warning("Selected path is not a valid folder.")
131131
else:
132-
st.warning("Could not open folder browser. Please enter the path manually")
132+
st.warning(
133+
"Could not open folder browser. Please enter the path manually"
134+
)
133135

134136
if dataset_path_input != st.session_state.get("dataset_path", ""):
135137
st.session_state["dataset_path"] = dataset_path_input
138+
if dataset_type_selectbox != st.session_state.get("dataset_type", ""):
139+
st.session_state["dataset_type"] = dataset_type_selectbox
140+
141+
# Additional input for YOLO config file
142+
if dataset_type_selectbox == "YOLO":
143+
dataset_config_file_uploader = st.file_uploader(
144+
"Dataset Configuration (.yaml)",
145+
type=["yaml"],
146+
key="dataset_config_file",
147+
help="Upload a YAML dataset configuration file.",
148+
)
149+
if dataset_config_file_uploader != st.session_state.get(
150+
"dataset_config_file", None
151+
):
152+
st.session_state["dataset_config_file"] = dataset_config_file_uploader
136153

137154
with st.expander("Model Inputs", expanded=False):
138155
st.file_uploader(
139-
"Model File (.pt, .onnx, .h5, .pb, .pth)",
140-
type=["pt", "onnx", "h5", "pb", "pth"],
156+
"Model File (.pt, .onnx, .h5, .pb, .pth, .torchscript)",
157+
type=["pt", "onnx", "h5", "pb", "pth", "torchscript"],
141158
key="model_file",
142159
help="Upload your trained model file.",
143160
)
@@ -199,6 +216,17 @@ def browse_folder():
199216
index=0 if st.session_state.get("device", "cpu") == "cpu" else 1,
200217
key="device",
201218
)
219+
st.selectbox(
220+
"Model Format",
221+
["torchvision", "YOLO"],
222+
index=(
223+
0
224+
if st.session_state.get("model_format", "torchvision")
225+
== "torchvision"
226+
else 1
227+
),
228+
key="model_format",
229+
)
202230
st.number_input(
203231
"Batch Size",
204232
min_value=1,
@@ -264,13 +292,15 @@ def browse_folder():
264292
device = st.session_state.get("device", "cpu")
265293
batch_size = int(st.session_state.get("batch_size", 1))
266294
evaluation_step = int(st.session_state.get("evaluation_step", 5))
295+
model_format = st.session_state.get("model_format", "torchvision")
267296
config_data = {
268297
"confidence_threshold": confidence_threshold,
269298
"nms_threshold": nms_threshold,
270299
"max_detections_per_image": max_detections,
271300
"device": device,
272301
"batch_size": batch_size,
273302
"evaluation_step": evaluation_step,
303+
"model_format": model_format.lower(),
274304
}
275305
with tempfile.NamedTemporaryFile(
276306
delete=False, suffix=".json", mode="w"

detectionmetrics/datasets/coco.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -92,14 +92,14 @@ def __init__(self, annotation_file: str, image_dir: str, split: str = "train"):
9292
def read_annotation(
9393
self, fname: str
9494
) -> Tuple[List[List[float]], List[int], List[int]]:
95-
"""Return bounding boxes, labels, and category_ids for a given image ID.
95+
"""Return bounding boxes and category indices for a given image ID.
9696
9797
This method uses COCO's efficient indexing to load annotations on-demand.
9898
The COCO object maintains an internal index that allows for very fast
9999
annotation retrieval without needing a separate cache.
100100
101101
:param fname: str (image_id in string form)
102-
:return: Tuple of (boxes, labels, category_ids)
102+
:return: Tuple of (boxes, category_indices)
103103
"""
104104
# Extract image ID (fname might be a path or ID string)
105105
try:
@@ -112,11 +112,10 @@ def read_annotation(
112112
ann_ids = self.coco.getAnnIds(imgIds=image_id)
113113
anns = self.coco.loadAnns(ann_ids)
114114

115-
boxes, labels, category_ids = [], [], []
115+
boxes, category_indices = [], []
116116
for ann in anns:
117117
x, y, w, h = ann["bbox"]
118118
boxes.append([x, y, x + w, y + h])
119-
labels.append(ann["category_id"])
120-
category_ids.append(ann["category_id"])
119+
category_indices.append(ann["category_id"])
121120

122-
return boxes, labels, category_ids
121+
return boxes, category_indices

detectionmetrics/datasets/yolo.py

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
from glob import glob
2+
import os
3+
from typing import Tuple, List, Optional
4+
5+
import pandas as pd
6+
from PIL import Image
7+
8+
from detectionmetrics.datasets.detection import ImageDetectionDataset
9+
from detectionmetrics.utils import io as uio
10+
11+
12+
def build_dataset(
13+
dataset_fname: str, dataset_dir: Optional[str] = None, im_ext: str = "jpg"
14+
) -> Tuple[pd.DataFrame, dict]:
15+
"""Build dataset and ontology dictionaries from YOLO dataset structure
16+
17+
:param dataset_fname: Path to the YAML dataset configuration file
18+
:type dataset_fname: str
19+
:param dataset_dir: Path to the directory containing images and annotations. If not provided, it will be inferred from the dataset file
20+
:type dataset_dir: Optional[str]
21+
:param im_ext: Image file extension (default is "jpg")
22+
:type im_ext: str
23+
:return: Dataset DataFrame and ontology dictionary
24+
:rtype: Tuple[pd.DataFrame, dict]
25+
"""
26+
# Read dataset configuration from YAML file
27+
assert os.path.isfile(dataset_fname), f"Dataset file not found: {dataset_fname}"
28+
dataset_info = uio.read_yaml(dataset_fname)
29+
30+
# Check that image directory exists
31+
if dataset_dir is None:
32+
dataset_dir = dataset_info["path"]
33+
assert os.path.isdir(dataset_dir), f"Dataset directory not found: {dataset_dir}"
34+
35+
# Build ontology from dataset configuration
36+
ontology = {}
37+
for idx, name in dataset_info["names"].items():
38+
ontology[name] = {
39+
"idx": idx,
40+
"rgb": [0, 0, 0], # Placeholder; YAML doesn't define RGB colors
41+
}
42+
43+
# Build dataset DataFrame
44+
rows = []
45+
for split in ["train", "val", "test"]:
46+
if split in dataset_info:
47+
images_dir = os.path.join(dataset_dir, dataset_info[split])
48+
labels_dir = os.path.join(
49+
dataset_dir, dataset_info[split].replace("images", "labels")
50+
)
51+
for label_fname in glob(os.path.join(labels_dir, "*.txt")):
52+
label_basename = os.path.basename(label_fname)
53+
image_basename = label_basename.replace(".txt", f".{im_ext}")
54+
image_fname = os.path.join(images_dir, image_basename)
55+
os.path.basename(image_fname)
56+
if not os.path.isfile(image_fname):
57+
continue
58+
59+
rows.append(
60+
{
61+
"image": os.path.join("images", split, image_basename),
62+
"annotation": os.path.join("labels", split, label_basename),
63+
"split": split,
64+
}
65+
)
66+
67+
dataset = pd.DataFrame(rows)
68+
dataset.attrs = {"ontology": ontology}
69+
70+
return dataset, ontology, dataset_dir
71+
72+
73+
class YOLODataset(ImageDetectionDataset):
74+
"""
75+
Specific class for YOLO-styled object detection datasets.
76+
77+
:param dataset_fname: Path to the YAML dataset configuration file
78+
:type dataset_fname: str
79+
:param dataset_dir: Path to the directory containing images and annotations. If not provided, it will be inferred from the dataset file
80+
:type dataset_dir: Optional[str]
81+
:param im_ext: Image file extension (default is "jpg")
82+
:type im_ext: str
83+
"""
84+
85+
def __init__(
86+
self, dataset_fname: str, dataset_dir: Optional[str], im_ext: str = "jpg"
87+
):
88+
# Build dataset using the same COCO object
89+
dataset, ontology, dataset_dir = build_dataset(
90+
dataset_fname, dataset_dir, im_ext
91+
)
92+
93+
self.im_ext = im_ext
94+
super().__init__(dataset=dataset, dataset_dir=dataset_dir, ontology=ontology)
95+
96+
def read_annotation(
97+
self, fname: str, image_size: Optional[Tuple[int, int]] = None
98+
) -> Tuple[List[List[float]], List[int], List[int]]:
99+
"""Return bounding boxes, and category indices for a given image ID.
100+
101+
:param fname: Annotation path
102+
:type fname: str
103+
:param image_size: Corresponding image size in (w, h) format for converting relative bbox size to absolute. If not provided, we will assume image path
104+
:type image_size: Optional[Tuple[int, int]]
105+
:return: Tuple of (boxes, category_indices)
106+
"""
107+
label = uio.read_txt(fname)
108+
image_fname = fname.replace(".txt", f".{self.im_ext}")
109+
image_fname = image_fname.replace("labels", "images")
110+
if image_size is None:
111+
image_size = Image.open(image_fname).size
112+
113+
boxes = []
114+
category_indices = []
115+
116+
im_w, im_h = image_size
117+
for row in label:
118+
category_idx, xc, yc, w, h = map(float, row.split())
119+
category_indices.append(int(category_idx))
120+
121+
abs_xc = xc * im_w
122+
abs_yc = yc * im_h
123+
abs_w = w * im_w
124+
abs_h = h * im_h
125+
126+
boxes.append(
127+
[
128+
abs_xc - abs_w / 2,
129+
abs_yc - abs_h / 2,
130+
abs_xc + abs_w / 2,
131+
abs_yc + abs_h / 2,
132+
]
133+
)
134+
135+
return boxes, category_indices

0 commit comments

Comments
 (0)