Skip to content

Commit da728b6

Browse files
committed
add docs for image aug - object detection
1 parent cce8ad2 commit da728b6

File tree

2 files changed

+67
-2
lines changed

2 files changed

+67
-2
lines changed

docs/modules/prepro.rst

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,45 @@ Greyscale erosion
220220
Object detection
221221
-------------------
222222

223+
Tutorial for Image Aug
224+
^^^^^^^^^^^^^^^^^^^^^^^
225+
226+
Hi, here is an example for image augmentation on VOC dataset.
227+
228+
.. code-block:: python
229+
230+
## download the VOC dataset
231+
imgs_file_list, imgs_semseg_file_list, imgs_insseg_file_list, imgs_ann_file_list, \
232+
classes, classes_in_person, classes_dict,\
233+
n_objs_list, objs_info_list, objs_info_dicts = tl.files.load_voc_dataset(dataset="2012", contain_classes_in_person=False)
234+
235+
## parse the annotation into list format
236+
ann_list = []
237+
for info in objs_info_list:
238+
ann = tl.prepro.parse_darknet_ann_str_to_list(info)
239+
c, b = tl.prepro.parse_darknet_ann_list_to_cls_box(ann)
240+
ann_list.append([c, b])
241+
242+
## different types of image augmentation
243+
image = tl.vis.read_image(imgs_file_list[idx])
244+
tl.vis.draw_boxes_and_labels_to_image(image, ann_list[idx][0], ann_list[idx][1], [], classes, True, save_name='_im_original.png')
245+
246+
im_flip, coords = tl.prepro.obj_box_left_right_flip(image, coords=ann_list[idx][1], is_rescale=True, is_center=True, is_random=False)
247+
tl.vis.draw_boxes_and_labels_to_image(im_flip, ann_list[idx][0], coords, [], classes, True, save_name='_im_flip.png')
248+
249+
im_resize, coords = tl.prepro.obj_box_imresize(image, coords=ann_list[idx][1], size=[300, 200], is_rescale=True)
250+
tl.vis.draw_boxes_and_labels_to_image(im_resize, ann_list[idx][0], coords, [], classes, True, save_name='_im_resize.png')
251+
252+
im_crop, clas, coords = tl.prepro.obj_box_crop(image, classes=ann_list[idx][0], coords=ann_list[idx][1], wrg=200, hrg=200, is_rescale=True, is_center=True, is_random=False)
253+
tl.vis.draw_boxes_and_labels_to_image(im_crop, clas, coords, [], classes, True, save_name='_im_crop.png')
254+
255+
im_shfit, clas, coords = tl.prepro.obj_box_shift(image, classes=ann_list[idx][0], coords=ann_list[idx][1], wrg=0.1, hrg=0.1, is_rescale=True, is_center=True, is_random=False)
256+
tl.vis.draw_boxes_and_labels_to_image(im_shfit, clas, coords, [], classes, True, save_name='_im_shift.png')
257+
258+
im_zoom, clas, coords = tl.prepro.obj_box_zoom(image, classes=ann_list[idx][0], coords=ann_list[idx][1], zoom_range=(1.3, 0.7), is_rescale=True, is_center=True, is_random=False)
259+
tl.vis.draw_boxes_and_labels_to_image(im_zoom, clas, coords, [], classes, True, save_name='_im_zoom.png')
260+
261+
223262
Coordinate pixel unit to percentage
224263
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
225264
.. autofunction:: obj_box_coord_rescale

tensorlayer/files.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -708,10 +708,10 @@ def load_celebA_dataset(dirpath='data'):
708708
return data_files
709709

710710
def load_voc_dataset(path='data/VOC', dataset='2012', contain_classes_in_person=False):
711-
"""Pascal VOC 2012 Dataset has 20 objects ``"aeroplane", "bicycle", "bird",
711+
""" Pascal VOC 2012 Dataset has 20 objects "aeroplane", "bicycle", "bird",
712712
"boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
713713
"dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa",
714-
"train", "tvmonitor"`` and additional 3 classes ``"head", "hand", "foot"``
714+
"train", "tvmonitor"`` and additional 3 classes ``"head", "hand", "foot"
715715
for person.
716716
717717
Parameters
@@ -746,6 +746,32 @@ def load_voc_dataset(path='data/VOC', dataset='2012', contain_classes_in_person=
746746
``{imgs_file_list : dictionary for annotation}``, the annotation of all images in ``imgs_file_list``,
747747
format from `TensorFlow/Models/object-detection <https://github.com/tensorflow/models/blob/master/object_detection/create_pascal_tf_record.py>`_.
748748
749+
Examples
750+
----------
751+
>>> imgs_file_list, imgs_semseg_file_list, imgs_insseg_file_list, imgs_ann_file_list, \
752+
... classes, classes_in_person, classes_dict,\
753+
... n_objs_list, objs_info_list, objs_info_dicts = tl.files.load_voc_dataset(dataset="2012", contain_classes_in_person=False)
754+
>>> idx = 26
755+
>>> print(classes)
756+
... ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']
757+
>>> print(classes_dict)
758+
... {'sheep': 16, 'horse': 12, 'bicycle': 1, 'bottle': 4, 'cow': 9, 'sofa': 17, 'car': 6, 'dog': 11, 'cat': 7, 'person': 14, 'train': 18, 'diningtable': 10, 'aeroplane': 0, 'bus': 5, 'pottedplant': 15, 'tvmonitor': 19, 'chair': 8, 'bird': 2, 'boat': 3, 'motorbike': 13}
759+
>>> print(imgs_file_list[idx])
760+
... data/VOC/VOC2012/JPEGImages/2007_000423.jpg
761+
>>> print(n_objs_list[idx])
762+
... 2
763+
>>> print(imgs_ann_file_list[idx])
764+
... data/VOC/VOC2012/Annotations/2007_000423.xml
765+
>>> print(objs_info_list[idx])
766+
... 14 0.173 0.461333333333 0.142 0.496
767+
... 14 0.828 0.542666666667 0.188 0.594666666667
768+
>>> ann = tl.prepro.parse_darknet_ann_str_to_list(objs_info_list[idx])
769+
>>> print(ann)
770+
... [[14, 0.173, 0.461333333333, 0.142, 0.496], [14, 0.828, 0.542666666667, 0.188, 0.594666666667]]
771+
>>> c, b = tl.prepro.parse_darknet_ann_list_to_cls_box(ann)
772+
>>> print(c, b)
773+
... [14, 14] [[0.173, 0.461333333333, 0.142, 0.496], [0.828, 0.542666666667, 0.188, 0.594666666667]]
774+
749775
References
750776
-------------
751777
- `Pascal VOC2012 Website <http://host.robots.ox.ac.uk/pascal/VOC/voc2012/#devkit>`_.

0 commit comments

Comments
 (0)