Skip to content

Commit cce8ad2

Browse files
committed
[release] APIs for download VOC dataset and Image Aug for Object Detection
1 parent 069a3ed commit cce8ad2

File tree

8 files changed

+1038
-10
lines changed

8 files changed

+1038
-10
lines changed

docs/modules/files.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ sake of cross-platform.
4545
load_flickr1M_dataset
4646
load_cyclegan_dataset
4747
load_celebA_dataset
48+
load_voc_dataset
4849
download_file_from_google_drive
4950

5051
save_npz
@@ -121,6 +122,10 @@ CelebA
121122
^^^^^^^^^
122123
.. autofunction:: load_celebA_dataset
123124

125+
VOC 2007/2012
126+
^^^^^^^^^^^^^^^^
127+
.. autofunction:: load_voc_dataset
128+
124129
Google Drive
125130
^^^^^^^^^^^^^^^^
126131
.. autofunction:: download_file_from_google_drive

docs/modules/prepro.rst

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,25 @@ Some of the code in this package are borrowed from Keras.
6363
binary_erosion
6464
erosion
6565

66+
67+
obj_box_coord_rescale
68+
obj_box_coords_rescale
69+
obj_box_coord_scale_to_pixelunit
70+
obj_box_coord_centroid_to_upleft_butright
71+
obj_box_coord_centroid_to_upleft
72+
obj_box_coord_upleft_to_centroid
73+
74+
parse_darknet_ann_str_to_list
75+
parse_darknet_ann_list_to_cls_box
76+
77+
obj_box_left_right_flip
78+
obj_box_imresize
79+
obj_box_crop
80+
obj_box_shift
81+
obj_box_zoom
82+
83+
84+
6685
pad_sequences
6786
remove_pad_sequences
6887
process_sequences
@@ -197,6 +216,66 @@ Greyscale erosion
197216
.. autofunction:: erosion
198217

199218

219+
220+
Object detection
221+
-------------------
222+
223+
Coordinate pixel unit to percentage
224+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
225+
.. autofunction:: obj_box_coord_rescale
226+
227+
Coordinates pixel unit to percentage
228+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
229+
.. autofunction:: obj_box_coords_rescale
230+
231+
Coordinate percentage to pixel unit
232+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
233+
.. autofunction:: obj_box_coord_scale_to_pixelunit
234+
235+
Coordinate [x_center, x_center, w, h] to up-left button-right
236+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
237+
.. autofunction:: obj_box_coord_centroid_to_upleft_butright
238+
239+
Coordinate [x_center, x_center, w, h] to up-left-width-high
240+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
241+
.. autofunction:: obj_box_coord_centroid_to_upleft
242+
243+
Coordinate up-left-width-high to [x_center, x_center, w, h]
244+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
245+
.. autofunction:: obj_box_coord_upleft_to_centroid
246+
247+
Darknet format string to list
248+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
249+
.. autofunction:: parse_darknet_ann_str_to_list
250+
251+
Darknet format split class and coordinate
252+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
253+
.. autofunction:: parse_darknet_ann_list_to_cls_box
254+
255+
Image Aug - Flip
256+
^^^^^^^^^^^^^^^^^^^^^^^^^
257+
.. autofunction:: obj_box_left_right_flip
258+
259+
Image Aug - Resize
260+
^^^^^^^^^^^^^^^^^^^^^^^^^
261+
.. autofunction:: obj_box_imresize
262+
263+
Image Aug - Crop
264+
^^^^^^^^^^^^^^^^^^^^^^^^^
265+
.. autofunction:: obj_box_crop
266+
267+
Image Aug - Shift
268+
^^^^^^^^^^^^^^^^^^^^^^^^^
269+
.. autofunction:: obj_box_shift
270+
271+
Image Aug - Zoom
272+
^^^^^^^^^^^^^^^^^^^^^^^^^
273+
.. autofunction:: obj_box_zoom
274+
275+
276+
277+
278+
200279
Sequence
201280
---------
202281

docs/modules/utils.rst

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ API - Utility
1212
class_balancing_oversample
1313
get_random_int
1414
dict_to_one
15+
list_string_to_dict
1516
flatten_list
1617

1718
Training, testing and predicting
@@ -41,13 +42,17 @@ Random functions
4142
----------------------------
4243
.. autofunction:: get_random_int
4344

44-
Helper functions
45+
Dictionary
4546
--------------------
4647

4748
Set all items in dictionary to one
4849
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
4950
.. autofunction:: dict_to_one
5051

52+
Convert list of string to dictionary
53+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
54+
.. autofunction:: list_string_to_dict
55+
5156
Flatten a list
5257
^^^^^^^^^^^^^^^^^^^
5358
.. autofunction:: flatten_list

tensorlayer/files.py

Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import zipfile
1313
from . import visualize
1414
from . import nlp
15+
from . import utils
1516
import pickle
1617
from six.moves import urllib
1718
from six.moves import cPickle
@@ -706,6 +707,217 @@ def load_celebA_dataset(dirpath='data'):
706707
data_files[i] = os.path.join(image_path, data_files[i])
707708
return data_files
708709

710+
def load_voc_dataset(path='data/VOC', dataset='2012', contain_classes_in_person=False):
711+
"""Pascal VOC 2012 Dataset has 20 objects ``"aeroplane", "bicycle", "bird",
712+
"boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
713+
"dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa",
714+
"train", "tvmonitor"`` and additional 3 classes ``"head", "hand", "foot"``
715+
for person.
716+
717+
Parameters
718+
-----------
719+
path : string
720+
The path that the data is downloaded to, defaults is ``data/VOC``.
721+
dataset : string, 2012 or 2007
722+
The VOC dataset version.
723+
contain_classes_in_person : If True, dataset will contains labels of head, hand and foot.
724+
725+
Returns
726+
---------
727+
imgs_file_list : list of string.
728+
Full paths of all images.
729+
imgs_semseg_file_list : list of string.
730+
Full paths of all maps for semantic segmentation. Note that not all images have this map!
731+
imgs_insseg_file_list : list of string.
732+
Full paths of all maps for instance segmentation. Note that not all images have this map!
733+
imgs_ann_file_list : list of string.
734+
Full paths of all annotations for bounding box and object class, all images have this annotations.
735+
classes : list of string.
736+
Classes in order.
737+
classes_in_person : list of string.
738+
Classes in person.
739+
classes_dict : dictionary.
740+
Class label to integer.
741+
n_objs_list : list of integer
742+
Number of objects in all images in ``imgs_file_list` in order.
743+
objs_info_list : list of string.
744+
Darknet format for the annotation of all images in ``imgs_file_list`` in order. ``[class_id x_centre y_centre width height]`` in ratio format.
745+
objs_info_dicts : dictionary.
746+
``{imgs_file_list : dictionary for annotation}``, the annotation of all images in ``imgs_file_list``,
747+
format from `TensorFlow/Models/object-detection <https://github.com/tensorflow/models/blob/master/object_detection/create_pascal_tf_record.py>`_.
748+
749+
References
750+
-------------
751+
- `Pascal VOC2012 Website <http://host.robots.ox.ac.uk/pascal/VOC/voc2012/#devkit>`_.
752+
- `Pascal VOC2007 Website <http://host.robots.ox.ac.uk/pascal/VOC/voc2007/>`_.
753+
- `TensorFlow/Models/object-detection <https://github.com/zsdonghao/object-detection/blob/master/g3doc/preparing_inputs.md>`_.
754+
"""
755+
756+
def _recursive_parse_xml_to_dict(xml):
757+
"""Recursively parses XML contents to python dict.
758+
We assume that `object` tags are the only ones that can appear
759+
multiple times at the same level of a tree.
760+
761+
Args:
762+
xml: xml tree obtained by parsing XML file contents using lxml.etree
763+
764+
Returns:
765+
Python dictionary holding XML contents.
766+
"""
767+
if not xml:
768+
# if xml is not None:
769+
return {xml.tag: xml.text}
770+
result = {}
771+
for child in xml:
772+
child_result = _recursive_parse_xml_to_dict(child)
773+
if child.tag != 'object':
774+
result[child.tag] = child_result[child.tag]
775+
else:
776+
if child.tag not in result:
777+
result[child.tag] = []
778+
result[child.tag].append(child_result[child.tag])
779+
return {xml.tag: result}
780+
781+
from lxml import etree # pip install lxml
782+
import xml.etree.ElementTree as ET
783+
784+
##
785+
if dataset == "2012":
786+
url = "http://host.robots.ox.ac.uk/pascal/VOC/voc2012/"
787+
tar_filename = "VOCtrainval_11-May-2012.tar"
788+
extracted_filename = "VOC2012"#"VOCdevkit/VOC2012"
789+
print(" [============= VOC 2012 =============]")
790+
elif dataset == "2007":
791+
url = "http://host.robots.ox.ac.uk/pascal/VOC/voc2007/"
792+
tar_filename = "VOCtrainval_06-Nov-2007.tar"
793+
extracted_filename = "VOC2007"
794+
print(" [============= VOC 2007 =============]")
795+
else:
796+
raise Exception("Please set the dataset aug to either 2012 or 2007.")
797+
798+
##======== download dataset
799+
if folder_exists(path+"/"+extracted_filename) is False:
800+
print("[VOC] {} is nonexistent in {}".format(extracted_filename, path))
801+
maybe_download_and_extract(tar_filename, path, url, extract=True)
802+
del_file(path+'/'+tar_filename)
803+
if dataset == "2012":
804+
os.system("mv {}/VOCdevkit/VOC2012 {}/VOC2012".format(path, path))
805+
elif dataset == "2007":
806+
os.system("mv {}/VOCdevkit/VOC2007 {}/VOC2007".format(path, path))
807+
del_folder(path+'/VOCdevkit')
808+
##======== object classes(labels) NOTE: YOU CAN CUSTOMIZE THIS LIST
809+
classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car",
810+
"cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike",
811+
"person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
812+
if contain_classes_in_person:
813+
classes_in_person = ["head", "hand", "foot"]
814+
else:
815+
classes_in_person = []
816+
817+
classes += classes_in_person # use extra 3 classes for person
818+
819+
classes_dict = utils.list_string_to_dict(classes)
820+
print("[VOC] object classes {}".format(classes_dict))
821+
822+
##======== 1. image path list
823+
folder_imgs = path+"/"+extracted_filename+"/JPEGImages/"
824+
imgs_file_list = load_file_list(path=folder_imgs, regx='\\.jpg', printable=False)
825+
print("[VOC] {} images found".format(len(imgs_file_list)))
826+
imgs_file_list.sort(key=lambda s : int(s.replace('.',' ').replace('_', '').split(' ')[-2])) # 2007_000027.jpg --> 2007000027
827+
imgs_file_list = [folder_imgs+s for s in imgs_file_list]
828+
# print('IM',imgs_file_list[0::3333], imgs_file_list[-1])
829+
##======== 2. semantic segmentation maps path list
830+
folder_semseg = path+"/"+extracted_filename+"/SegmentationClass/"
831+
imgs_semseg_file_list = load_file_list(path=folder_semseg, regx='\\.png', printable=False)
832+
print("[VOC] {} maps for semantic segmentation found".format(len(imgs_semseg_file_list)))
833+
imgs_semseg_file_list.sort(key=lambda s : int(s.replace('.',' ').replace('_', '').split(' ')[-2])) # 2007_000032.png --> 2007000032
834+
imgs_semseg_file_list = [folder_semseg+s for s in imgs_semseg_file_list]
835+
# print('Semantic Seg IM',imgs_semseg_file_list[0::333], imgs_semseg_file_list[-1])
836+
##======== 3. instance segmentation maps path list
837+
folder_insseg = path+"/"+extracted_filename+"/SegmentationObject/"
838+
imgs_insseg_file_list = load_file_list(path=folder_insseg, regx='\\.png', printable=False)
839+
print("[VOC] {} maps for instance segmentation found".format(len(imgs_semseg_file_list)))
840+
imgs_insseg_file_list.sort(key=lambda s : int(s.replace('.',' ').replace('_', '').split(' ')[-2])) # 2007_000032.png --> 2007000032
841+
imgs_insseg_file_list = [folder_semseg+s for s in imgs_insseg_file_list]
842+
# print('Instance Seg IM',imgs_insseg_file_list[0::333], imgs_insseg_file_list[-1])
843+
##======== 4. annotations for bounding box and object class
844+
folder_ann = path+"/"+extracted_filename+"/Annotations/"
845+
imgs_ann_file_list = load_file_list(path=folder_ann, regx='\\.xml', printable=False)
846+
print("[VOC] {} XML annotation files for bounding box and object class found".format(len(imgs_ann_file_list)))
847+
imgs_ann_file_list.sort(key=lambda s : int(s.replace('.',' ').replace('_', '').split(' ')[-2])) # 2007_000027.xml --> 2007000027
848+
imgs_ann_file_list = [folder_ann+s for s in imgs_ann_file_list]
849+
# print('ANN',imgs_ann_file_list[0::3333], imgs_ann_file_list[-1])
850+
##======== parse XML annotations
851+
def convert(size, box):
852+
dw = 1./size[0]
853+
dh = 1./size[1]
854+
x = (box[0] + box[1])/2.0
855+
y = (box[2] + box[3])/2.0
856+
w = box[1] - box[0]
857+
h = box[3] - box[2]
858+
x = x*dw
859+
w = w*dw
860+
y = y*dh
861+
h = h*dh
862+
return (x,y,w,h)
863+
864+
def convert_annotation(file_name):
865+
""" Given VOC2012 XML Annotations, returns number of objects and info. """
866+
in_file = open(file_name)
867+
out_file = ""
868+
tree = ET.parse(in_file)
869+
root = tree.getroot()
870+
size = root.find('size')
871+
w = int(size.find('width').text)
872+
h = int(size.find('height').text)
873+
n_objs = 0
874+
875+
for obj in root.iter('object'):
876+
difficult = obj.find('difficult').text
877+
cls = obj.find('name').text
878+
if cls not in classes or int(difficult) == 1:
879+
continue
880+
cls_id = classes.index(cls)
881+
xmlbox = obj.find('bndbox')
882+
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
883+
bb = convert((w,h), b)
884+
# out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
885+
out_file += str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n'
886+
n_objs += 1
887+
if cls in "person":
888+
for part in obj.iter('part'):
889+
cls = part.find('name').text
890+
if cls not in classes_in_person:
891+
continue
892+
cls_id = classes.index(cls)
893+
xmlbox = part.find('bndbox')
894+
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
895+
bb = convert((w,h), b)
896+
# out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
897+
out_file += str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n'
898+
n_objs += 1
899+
in_file.close()
900+
return n_objs, out_file
901+
902+
print("[VOC] Parsing xml annotations files")
903+
n_objs_list = []
904+
objs_info_list = [] # Darknet Format list of string
905+
objs_info_dicts = {}
906+
for idx, ann_file in enumerate(imgs_ann_file_list):
907+
n_objs, objs_info = convert_annotation(ann_file)
908+
n_objs_list.append(n_objs)
909+
objs_info_list.append(objs_info)
910+
with tf.gfile.GFile(ann_file, 'r') as fid:
911+
xml_str = fid.read()
912+
xml = etree.fromstring(xml_str)
913+
data = _recursive_parse_xml_to_dict(xml)['annotation']
914+
objs_info_dicts.update({imgs_file_list[idx]: data})
915+
916+
return imgs_file_list, imgs_semseg_file_list, imgs_insseg_file_list, imgs_ann_file_list, \
917+
classes, classes_in_person, classes_dict,\
918+
n_objs_list, objs_info_list, objs_info_dicts
919+
920+
709921
## Load and save network list npz
710922
def save_npz(save_list=[], name='model.npz', sess=None):
711923
"""Input parameters and the file name, save parameters into .npz file. Use tl.utils.load_npz() to restore.

tensorlayer/layers.py

100755100644
File mode changed.

0 commit comments

Comments
 (0)