Skip to content

Commit b76329f

Browse files
authored
Merge pull request #4 from goderent/main
add scipt translate yolov5 yaml cofig file to coco
2 parents dfd1220 + 4446715 commit b76329f

File tree

3 files changed

+264
-0
lines changed

3 files changed

+264
-0
lines changed

README.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,22 @@
4848
└── 000000000001.jpg
4949
```
5050
51+
#### YOLOV5 yaml数据描述文件转→COCO
52+
53+
- YOLOV5 yaml 数据文件需要包含:
54+
```text
55+
YOLOV5 yaml
56+
├── path(str, the root path)
57+
├── train(Train sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..])
58+
├── val(val sets, similar as train)
59+
60+
```
61+
62+
- 转换
63+
```shell
64+
python yolov5_cfgfile_2_coco.py --cfg_file dataset/sample.yaml
65+
```
66+
5167
#### 可视化COCO格式标注格式
5268
```shell
5369
python coco_visual.py --vis_num 1 \

dataset/sample.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
2+
path: /the/root/path/to/data # dataset root dir
3+
train: [data1/images/train/, data2/images/train/]
4+
val: [data1/images/val/, data2/images/val/]
5+
6+
# Classes
7+
nc: 3 # number of classes
8+
names: ['class1', 'class2', 'class3'] # class names

yolov5_cfgfile_2_coco.py

Lines changed: 240 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,240 @@
1+
# !/usr/bin/env python
2+
# -*- encoding: utf-8 -*-
3+
4+
import argparse
5+
import json
6+
from webbrowser import BackgroundBrowser
7+
import yaml
8+
import shutil
9+
import glob
10+
import os
11+
from pathlib import Path
12+
13+
import cv2
14+
from tqdm import tqdm
15+
16+
17+
def read_txt(txt_path):
18+
with open(str(txt_path), 'r', encoding='utf-8') as f:
19+
data = list(map(lambda x: x.rstrip('\n'), f))
20+
return data
21+
22+
23+
def mkdir(dir_path):
24+
Path(dir_path).mkdir(parents=True, exist_ok=True)
25+
26+
27+
def verify_exists(file_path):
28+
file_path = Path(file_path)
29+
if not file_path.exists():
30+
raise FileNotFoundError(f'The {file_path} is not exists!!!')
31+
32+
class YOLOV5CFG2COCO(object):
33+
def __init__(self, cfg_file):
34+
ROOT = Path(cfg_file).resolve().parent
35+
with open(cfg_file, 'r', encoding="UTF-8") as f:
36+
data_cfg = yaml.safe_load(f)
37+
path = Path(data_cfg.get('path') or '') # optional 'path' default to '.'
38+
if not path.is_absolute():
39+
path = (ROOT / path).resolve()
40+
for k in 'train', 'val', 'test':
41+
if data_cfg.get(k): # prepend path
42+
data_cfg[k] = str(path / data_cfg[k]) if isinstance(data_cfg[k], str) else [str(path / x) for x in data_cfg[k]]
43+
if 'names' not in data_cfg:
44+
data_cfg['names'] = [f'class{i}' for i in range(data_cfg['nc'])] # assign class names if missing
45+
self.train_path, self.val_path, self.test_path = (data_cfg.get(x) for x in ('train', 'val', 'test'))
46+
nc = data_cfg['nc']
47+
self.names = data_cfg['names']
48+
assert len(self.names) == nc, f'{len(self.names)} names found for nc={nc} dataset in {cfg_file}' # check
49+
50+
# 构建COCO格式目录
51+
self.dst = ROOT / f"{Path(cfg_file).stem}_COCO_format"
52+
self.coco_train = "train2017"
53+
self.coco_val = "val2017"
54+
self.coco_annotation = "annotations"
55+
self.coco_train_json = self.dst / self.coco_annotation / \
56+
f'instances_{self.coco_train}.json'
57+
self.coco_val_json = self.dst / self.coco_annotation / \
58+
f'instances_{self.coco_val}.json'
59+
60+
mkdir(self.dst)
61+
mkdir(self.dst / self.coco_train)
62+
mkdir(self.dst / self.coco_val)
63+
mkdir(self.dst / self.coco_annotation)
64+
65+
# 构建json内容结构
66+
self.type = 'instances'
67+
self.categories = []
68+
self._get_category()
69+
self.annotation_id = 1
70+
71+
self.info = {
72+
'year': 2021,
73+
'version': '1.0',
74+
'description': 'For object detection',
75+
'date_created': '2021',
76+
}
77+
78+
self.licenses = [{
79+
'id': 1,
80+
'name': 'Apache License v2.0',
81+
'url': 'https://github.com/RapidAI/YOLO2COCO/LICENSE',
82+
}]
83+
84+
def _get_category(self):
85+
for i, category in enumerate(self.names, 1):
86+
self.categories.append({
87+
'supercategory': category,
88+
'id': i,
89+
'name': category,
90+
})
91+
# self.categories.append({
92+
# 'supercategory': 'Background',
93+
# 'id': 0,
94+
# 'name': 'Background',
95+
# })
96+
97+
def generate(self):
98+
self.train_files = self.getfiles(self.train_path)
99+
self.valid_files = self.getfiles(self.val_path)
100+
101+
train_dest_dir = Path(self.dst) / self.coco_train
102+
self.gen_dataset(self.train_files, train_dest_dir,
103+
self.coco_train_json, mode='train')
104+
105+
val_dest_dir = Path(self.dst) / self.coco_val
106+
self.gen_dataset(self.valid_files, val_dest_dir,
107+
self.coco_val_json, mode='val')
108+
109+
print(f"The output directory is: {str(self.dst)}")
110+
111+
def getfiles(self, path):
112+
IMG_FORMATS = 'bmp', 'dng', 'jpeg', 'jpg', 'mpo', 'png', 'tif', 'tiff', 'webp' # include image suffixes
113+
f = []
114+
for p in path if isinstance(path, list) else [path]:
115+
p = Path(p) # os-agnostic
116+
if p.is_dir(): # dir
117+
f += glob.glob(str(p / '**' / '*.*'), recursive=True)
118+
# f = list(p.rglob('*.*')) # pathlib
119+
elif p.is_file(): # file
120+
with open(p) as t:
121+
t = t.read().strip().splitlines()
122+
parent = str(p.parent) + os.sep
123+
f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path
124+
# f += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib)
125+
else:
126+
raise Exception(f'{p} does not exist')
127+
im_files = sorted(x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in IMG_FORMATS)
128+
return im_files
129+
130+
def gen_dataset(self, img_paths, target_img_path, target_json, mode):
131+
"""
132+
https://cocodataset.org/#format-data
133+
134+
"""
135+
images = []
136+
annotations = []
137+
sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep # /images/, /labels/ substrings
138+
for img_id, img_path in enumerate(tqdm(img_paths, desc=mode), 1):
139+
label_path = sb.join(img_path.rsplit(sa, 1)).rsplit('.', 1)[0] + '.txt'
140+
img_path = Path(img_path)
141+
142+
verify_exists(img_path)
143+
print(img_path)
144+
imgsrc = cv2.imread(str(img_path))
145+
height, width = imgsrc.shape[:2]
146+
147+
dest_file_name = f'{img_id:012d}.jpg'
148+
save_img_path = target_img_path / dest_file_name
149+
150+
if img_path.suffix.lower() == ".jpg":
151+
shutil.copyfile(img_path, save_img_path)
152+
else:
153+
cv2.imwrite(str(save_img_path), imgsrc)
154+
155+
images.append({
156+
'date_captured': '2021',
157+
'file_name': dest_file_name,
158+
'id': img_id,
159+
'height': height,
160+
'width': width,
161+
})
162+
163+
if Path(label_path).exists():
164+
new_anno = self.read_annotation(label_path, img_id,
165+
height, width)
166+
if len(new_anno) > 0:
167+
annotations.extend(new_anno)
168+
else:
169+
# print(f'{label_path} is empty')
170+
raise ValueError(f'{label_path} is empty')
171+
else:
172+
raise FileNotFoundError(f'{label_path} not exists')
173+
174+
json_data = {
175+
'info': self.info,
176+
'images': images,
177+
'licenses': self.licenses,
178+
'type': self.type,
179+
'annotations': annotations,
180+
'categories': self.categories,
181+
}
182+
with open(target_json, 'w', encoding='utf-8') as f:
183+
json.dump(json_data, f, ensure_ascii=False)
184+
185+
def read_annotation(self, txt_file, img_id, height, width):
186+
annotation = []
187+
all_info = read_txt(txt_file)
188+
for label_info in all_info:
189+
# 遍历一张图中不同标注对象
190+
label_info = label_info.split(" ")
191+
if len(label_info) < 5:
192+
continue
193+
194+
category_id, vertex_info = label_info[0], label_info[1:]
195+
segmentation, bbox, area = self._get_annotation(vertex_info,
196+
height, width)
197+
annotation.append({
198+
'segmentation': segmentation,
199+
'area': area,
200+
'iscrowd': 0,
201+
'image_id': img_id,
202+
'bbox': bbox,
203+
'category_id': int(category_id)+1,
204+
'id': self.annotation_id,
205+
})
206+
self.annotation_id += 1
207+
return annotation
208+
209+
@staticmethod
210+
def _get_annotation(vertex_info, height, width):
211+
cx, cy, w, h = [float(i) for i in vertex_info]
212+
213+
cx = cx * width
214+
cy = cy * height
215+
box_w = w * width
216+
box_h = h * height
217+
218+
# left top
219+
x0 = max(cx - box_w / 2, 0)
220+
y0 = max(cy - box_h / 2, 0)
221+
222+
# right bottomt
223+
x1 = min(x0 + box_w, width)
224+
y1 = min(y0 + box_h, height)
225+
226+
segmentation = [[x0, y0, x1, y0, x1, y1, x0, y1]]
227+
bbox = [x0, y0, box_w, box_h]
228+
area = box_w * box_h
229+
return segmentation, bbox, area
230+
231+
232+
if __name__ == "__main__":
233+
parser = argparse.ArgumentParser('Datasets converter from YOLOV5 to COCO')
234+
parser.add_argument('--cfg_file', type=str,
235+
default='datasets/YOLOV5',
236+
help='Dataset cfg file')
237+
args = parser.parse_args()
238+
239+
converter = YOLOV5CFG2COCO(args.cfg_file)
240+
converter.generate()

0 commit comments

Comments
 (0)