Skip to content

Commit e7f8760

Browse files
committed
add scipt translate yolov5 yaml cofig file to coco
1 parent dfd1220 commit e7f8760

File tree

1 file changed

+239
-0
lines changed

1 file changed

+239
-0
lines changed

yolov5_cfgfile_2_coco.py

Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
# !/usr/bin/env python
2+
# -*- encoding: utf-8 -*-
3+
4+
import argparse
5+
import json
6+
from webbrowser import BackgroundBrowser
7+
import yaml
8+
import shutil
9+
import glob
10+
import os
11+
from pathlib import Path
12+
13+
import cv2
14+
from tqdm import tqdm
15+
16+
17+
def read_txt(txt_path):
18+
with open(str(txt_path), 'r', encoding='utf-8') as f:
19+
data = list(map(lambda x: x.rstrip('\n'), f))
20+
return data
21+
22+
23+
def mkdir(dir_path):
24+
Path(dir_path).mkdir(parents=True, exist_ok=True)
25+
26+
27+
def verify_exists(file_path):
28+
file_path = Path(file_path)
29+
if not file_path.exists():
30+
raise FileNotFoundError(f'The {file_path} is not exists!!!')
31+
32+
class YOLOV5CFG2COCO(object):
33+
def __init__(self, cfg_file):
34+
ROOT = Path(cfg_file).resolve().parent
35+
with open(cfg_file, 'r', encoding="UTF-8") as f:
36+
data_cfg = yaml.safe_load(f)
37+
path = Path(data_cfg.get('path') or '') # optional 'path' default to '.'
38+
if not path.is_absolute():
39+
path = (ROOT / path).resolve()
40+
for k in 'train', 'val', 'test':
41+
if data_cfg.get(k): # prepend path
42+
data_cfg[k] = str(path / data_cfg[k]) if isinstance(data_cfg[k], str) else [str(path / x) for x in data_cfg[k]]
43+
if 'names' not in data_cfg:
44+
data_cfg['names'] = [f'class{i}' for i in range(data_cfg['nc'])] # assign class names if missing
45+
self.train_path, self.val_path, self.test_path = (data_cfg.get(x) for x in ('train', 'val', 'test'))
46+
nc = data_cfg['nc']
47+
self.names = data_cfg['names']
48+
assert len(self.names) == nc, f'{len(self.names)} names found for nc={nc} dataset in {cfg_file}' # check
49+
50+
# 构建COCO格式目录
51+
self.dst = ROOT / f"{Path(cfg_file).stem}_COCO_format"
52+
self.coco_train = "train2017"
53+
self.coco_val = "val2017"
54+
self.coco_annotation = "annotations"
55+
self.coco_train_json = self.dst / self.coco_annotation / \
56+
f'instances_{self.coco_train}.json'
57+
self.coco_val_json = self.dst / self.coco_annotation / \
58+
f'instances_{self.coco_val}.json'
59+
60+
mkdir(self.dst)
61+
mkdir(self.dst / self.coco_train)
62+
mkdir(self.dst / self.coco_val)
63+
mkdir(self.dst / self.coco_annotation)
64+
65+
# 构建json内容结构
66+
self.type = 'instances'
67+
self.categories = []
68+
self.annotation_id = 1
69+
70+
self.info = {
71+
'year': 2021,
72+
'version': '1.0',
73+
'description': 'For object detection',
74+
'date_created': '2021',
75+
}
76+
77+
self.licenses = [{
78+
'id': 1,
79+
'name': 'Apache License v2.0',
80+
'url': 'https://github.com/RapidAI/YOLO2COCO/LICENSE',
81+
}]
82+
83+
def _get_category(self):
84+
for i, category in enumerate(self.names, 1):
85+
self.categories.append({
86+
'supercategory': category,
87+
'id': i,
88+
'name': category,
89+
})
90+
self.categories.append({
91+
'supercategory': 'Background',
92+
'id': 0,
93+
'name': 'Background',
94+
})
95+
96+
def generate(self):
97+
self.train_files = self.getfiles(self.train_path)
98+
self.valid_files = self.getfiles(self.val_path)
99+
100+
train_dest_dir = Path(self.dst) / self.coco_train
101+
self.gen_dataset(self.train_files, train_dest_dir,
102+
self.coco_train_json, mode='train')
103+
104+
# val_dest_dir = Path(self.dst) / self.coco_val
105+
# self.gen_dataset(self.valid_files, val_dest_dir,
106+
# self.coco_val_json, mode='val')
107+
108+
print(f"The output directory is: {str(self.dst)}")
109+
110+
def getfiles(self, path):
111+
IMG_FORMATS = 'bmp', 'dng', 'jpeg', 'jpg', 'mpo', 'png', 'tif', 'tiff', 'webp' # include image suffixes
112+
f = []
113+
for p in path if isinstance(path, list) else [path]:
114+
p = Path(p) # os-agnostic
115+
if p.is_dir(): # dir
116+
f += glob.glob(str(p / '**' / '*.*'), recursive=True)
117+
# f = list(p.rglob('*.*')) # pathlib
118+
elif p.is_file(): # file
119+
with open(p) as t:
120+
t = t.read().strip().splitlines()
121+
parent = str(p.parent) + os.sep
122+
f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path
123+
# f += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib)
124+
else:
125+
raise Exception(f'{p} does not exist')
126+
im_files = sorted(x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in IMG_FORMATS)
127+
return im_files
128+
129+
def gen_dataset(self, img_paths, target_img_path, target_json, mode):
130+
"""
131+
https://cocodataset.org/#format-data
132+
133+
"""
134+
images = []
135+
annotations = []
136+
sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep # /images/, /labels/ substrings
137+
for img_id, img_path in enumerate(tqdm(img_paths, desc=mode), 1):
138+
label_path = sb.join(img_path.rsplit(sa, 1)).rsplit('.', 1)[0] + '.txt'
139+
img_path = Path(img_path)
140+
141+
verify_exists(img_path)
142+
print(img_path)
143+
imgsrc = cv2.imread(str(img_path))
144+
height, width = imgsrc.shape[:2]
145+
146+
dest_file_name = f'{img_id:012d}.jpg'
147+
save_img_path = target_img_path / dest_file_name
148+
149+
if img_path.suffix.lower() == ".jpg":
150+
shutil.copyfile(img_path, save_img_path)
151+
else:
152+
cv2.imwrite(str(save_img_path), imgsrc)
153+
154+
images.append({
155+
'date_captured': '2021',
156+
'file_name': dest_file_name,
157+
'id': img_id,
158+
'height': height,
159+
'width': width,
160+
})
161+
162+
if Path(label_path).exists():
163+
new_anno = self.read_annotation(label_path, img_id,
164+
height, width)
165+
if len(new_anno) > 0:
166+
annotations.extend(new_anno)
167+
else:
168+
# print(f'{label_path} is empty')
169+
raise ValueError(f'{label_path} is empty')
170+
else:
171+
raise FileNotFoundError(f'{label_path} not exists')
172+
173+
json_data = {
174+
'info': self.info,
175+
'images': images,
176+
'licenses': self.licenses,
177+
'type': self.type,
178+
'annotations': annotations,
179+
'categories': self.categories,
180+
}
181+
with open(target_json, 'w', encoding='utf-8') as f:
182+
json.dump(json_data, f, ensure_ascii=False)
183+
184+
def read_annotation(self, txt_file, img_id, height, width):
185+
annotation = []
186+
all_info = read_txt(txt_file)
187+
for label_info in all_info:
188+
# 遍历一张图中不同标注对象
189+
label_info = label_info.split(" ")
190+
if len(label_info) < 5:
191+
continue
192+
193+
category_id, vertex_info = label_info[0], label_info[1:]
194+
segmentation, bbox, area = self._get_annotation(vertex_info,
195+
height, width)
196+
annotation.append({
197+
'segmentation': segmentation,
198+
'area': area,
199+
'iscrowd': 0,
200+
'image_id': img_id,
201+
'bbox': bbox,
202+
'category_id': int(category_id)+1,
203+
'id': self.annotation_id,
204+
})
205+
self.annotation_id += 1
206+
return annotation
207+
208+
@staticmethod
209+
def _get_annotation(vertex_info, height, width):
210+
cx, cy, w, h = [float(i) for i in vertex_info]
211+
212+
cx = cx * width
213+
cy = cy * height
214+
box_w = w * width
215+
box_h = h * height
216+
217+
# left top
218+
x0 = max(cx - box_w / 2, 0)
219+
y0 = max(cy - box_h / 2, 0)
220+
221+
# right bottomt
222+
x1 = min(x0 + box_w, width)
223+
y1 = min(y0 + box_h, height)
224+
225+
segmentation = [[x0, y0, x1, y0, x1, y1, x0, y1]]
226+
bbox = [x0, y0, box_w, box_h]
227+
area = box_w * box_h
228+
return segmentation, bbox, area
229+
230+
231+
if __name__ == "__main__":
232+
parser = argparse.ArgumentParser('Datasets converter from YOLOV5 to COCO')
233+
parser.add_argument('--cfg_file', type=str,
234+
default='datasets/YOLOV5',
235+
help='Dataset cfg file')
236+
args = parser.parse_args()
237+
238+
converter = YOLOV5CFG2COCO(args.cfg_file)
239+
converter.generate()

0 commit comments

Comments
 (0)