Skip to content

Commit c9f3d58

Browse files
committed
release flickr1M fn
1 parent 194f6b6 commit c9f3d58

File tree

2 files changed

+93
-4
lines changed

2 files changed

+93
-4
lines changed

docs/modules/files.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ sake of cross-platform.
4141
load_nietzsche_dataset
4242
load_wmt_en_fr_dataset
4343
load_flickr25k_dataset
44+
load_flickr1M_dataset
4445

4546
save_npz
4647
save_npz_dict
@@ -101,6 +102,9 @@ Flickr25k
101102
^^^^^^^^^^^^^^^^^^^^^^^^^
102103
.. autofunction:: load_flickr25k_dataset
103104

105+
Flickr1M
106+
^^^^^^^^^^^^^^^^^^^^^^^^^
107+
.. autofunction:: load_flickr1M_dataset
104108

105109

106110
Load and save network

tensorlayer/files.py

Lines changed: 89 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -513,15 +513,15 @@ def load_flickr25k_dataset(tag='sky', path="data/flickr25k", n_threads=50, print
513513
If you want to get images with tag, use string like 'dog', 'red', see `Flickr Search <https://www.flickr.com/search/>`_.
514514
If you want to get all images, set to ``None``.
515515
path : string
516-
The path that the data is downloaded to, defaults is ``data/flickr25k/``
516+
The path that the data is downloaded to, defaults is ``data/flickr25k/``.
517517
n_threads : int, number of thread to read image.
518518
printable : bool, print infomation when reading images, default is False.
519519
520520
Examples
521521
-----------
522522
- Get images with tag of sky
523523
>>> images = tl.files.load_flickr25k_dataset(tag='sky')
524-
524+
525525
- Get all images
526526
>>> images = tl.files.load_flickr25k_dataset(tag=None, n_threads=100, printable=True)
527527
"""
@@ -534,12 +534,12 @@ def load_flickr25k_dataset(tag='sky', path="data/flickr25k", n_threads=50, print
534534
del_file(path+'/'+filename)
535535
## return images by the given tag.
536536
# 1. image path list
537-
folder_imgs = "data/flickr25k/mirflickr"
537+
folder_imgs = path+"/mirflickr"
538538
path_imgs = load_file_list(path=folder_imgs, regx='\\.jpg', printable=False)
539539
path_imgs.sort(key=natural_keys)
540540
# print(path_imgs[0:10])
541541
# 2. tag path list
542-
folder_tags = "data/flickr25k/mirflickr/meta/tags"
542+
folder_tags = path+"/mirflickr/meta/tags"
543543
path_tags = load_file_list(path=folder_tags, regx='\\.txt', printable=False)
544544
path_tags.sort(key=natural_keys)
545545
# print(path_tags[0:10])
@@ -559,6 +559,91 @@ def load_flickr25k_dataset(tag='sky', path="data/flickr25k", n_threads=50, print
559559
return images
560560

561561

562+
563+
def load_flickr1M_dataset(tag='sky', size=10, path="data/flickr1M", n_threads=50, printable=False):
564+
"""Returns a list of images by a given tag from Flickr1M dataset,
565+
it will download Flickr1M from `the official website <http://press.liacs.nl/mirflickr/mirdownload.html>`_
566+
at the first time you use it.
567+
568+
Parameters
569+
------------
570+
tag : string or None
571+
If you want to get images with tag, use string like 'dog', 'red', see `Flickr Search <https://www.flickr.com/search/>`_.
572+
If you want to get all images, set to ``None``.
573+
size : int 1 to 10.
574+
1 means 100k images ... 5 means 500k images, 10 means all 1 million images. Default is 10.
575+
path : string
576+
The path that the data is downloaded to, defaults is ``data/flickr25k/``.
577+
n_threads : int, number of thread to read image.
578+
printable : bool, print infomation when reading images, default is False.
579+
"""
580+
print("[Flickr1M] using {}% of images = {}".format(size*10, size*100000))
581+
images_zip = ['images0.zip', 'images1.zip', 'images2.zip', 'images3.zip',
582+
'images4.zip', 'images5.zip', 'images6.zip', 'images7.zip',
583+
'images8.zip', 'images9.zip']
584+
tag_zip = 'tags.zip'
585+
url = 'http://press.liacs.nl/mirflickr/mirflickr1m/'
586+
## download dataset
587+
for image_zip in images_zip[0:size]:
588+
image_folder = image_zip.split(".")[0]
589+
# print(path+"/"+image_folder)
590+
if folder_exists(path+"/"+image_folder) is False:
591+
# print(image_zip)
592+
print("[Flickr1M] {} is missing in {}".format(image_folder, path))
593+
maybe_download_and_extract(image_zip, path, url, extract=True)
594+
del_file(path+'/'+image_zip)
595+
os.system("mv {} {}".format(path+'/images',path+'/'+image_folder))
596+
else:
597+
print("[Flickr1M] {} exists in {}".format(image_folder, path))
598+
## download tag
599+
if folder_exists(path+"/tags") is False:
600+
print("[Flickr1M] tag files is nonexistent in {}".format(path))
601+
maybe_download_and_extract(tag_zip, path, url, extract=True)
602+
del_file(path+'/'+tag_zip)
603+
else:
604+
print("[Flickr1M] tags exists in {}".format(path))
605+
606+
## 1. image path list
607+
images_list = []
608+
images_folder_list = []
609+
for i in range(0, size):
610+
images_folder_list += load_folder_list(path=path+'/images%d'%i)
611+
images_folder_list.sort(key=lambda s : int(s.split('/')[-1])) # folder/images/ddd
612+
# print(images_folder_list)
613+
# exit()
614+
for folder in images_folder_list[0:size*10]:
615+
tmp = load_file_list(path=folder, regx='\\.jpg', printable=False)
616+
tmp.sort(key=lambda s : int(s.split('.')[-2])) # ddd.jpg
617+
# print(tmp[0::570])
618+
images_list.extend([folder+'/'+x for x in tmp])
619+
# print('IM', len(images_list), images_list[0::6000])
620+
## 2. tag path list
621+
tag_list = []
622+
tag_folder_list = load_folder_list(path+"/tags")
623+
tag_folder_list.sort(key=lambda s : int(s.split('/')[-1])) # folder/images/ddd
624+
625+
for folder in tag_folder_list[0:size*10]:
626+
# print(folder)
627+
tmp = load_file_list(path=folder, regx='\\.txt', printable=False)
628+
tmp.sort(key=lambda s : int(s.split('.')[-2])) # ddd.txt
629+
tmp = [folder+'/'+s for s in tmp]
630+
tag_list += tmp
631+
# print('T', len(tag_list), tag_list[0::6000])
632+
# exit()
633+
## 3. select images
634+
print("[Flickr1M] searching tag: {}".format(tag))
635+
select_images_list = []
636+
for idx in range(0, len(tag_list)):
637+
tags = read_file(tag_list[idx]).split('\n')
638+
if tag in tags:
639+
select_images_list.append(images_list[idx])
640+
# print(idx, tags, tag_list[idx], images_list[idx])
641+
print("[Flickr1M] reading images with tag: {}".format(tag))
642+
images = visualize.read_images(select_images_list, '', n_threads=n_threads, printable=printable)
643+
return images
644+
645+
646+
562647
## Load and save network
563648
def save_npz(save_list=[], name='model.npz', sess=None):
564649
"""Input parameters and the file name, save parameters into .npz file. Use tl.utils.load_npz() to restore.

0 commit comments

Comments
 (0)