Skip to content

Commit bcdab79

Browse files
committed
release flickr and files fun
1 parent 50304da commit bcdab79

File tree

4 files changed

+178
-37
lines changed

4 files changed

+178
-37
lines changed

docs/modules/files.rst

Lines changed: 46 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ sake of cross-platform.
4040
load_imdb_dataset
4141
load_nietzsche_dataset
4242
load_wmt_en_fr_dataset
43+
load_flickr25k_dataset
4344

4445
save_npz
4546
save_npz_dict
@@ -51,13 +52,20 @@ sake of cross-platform.
5152
save_any_to_npy
5253
load_npy_to_any
5354

54-
npz_to_W_pdf
55-
55+
file_exists
56+
folder_exists
57+
del_file
58+
del_folder
59+
read_file
5660
load_file_list
5761
load_folder_list
5862
exists_or_mkdir
5963
maybe_download_and_extract
6064

65+
natural_keys
66+
67+
npz_to_W_pdf
68+
6169
Load dataset functions
6270
------------------------
6371

@@ -85,11 +93,15 @@ Nietzsche
8593
^^^^^^^^^^^^^^
8694
.. autofunction:: load_nietzsche_dataset
8795

88-
8996
English-to-French translation data from the WMT'15 Website
9097
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
9198
.. autofunction:: load_wmt_en_fr_dataset
9299

100+
Flickr25k
101+
^^^^^^^^^^^^^^^^^^^^^^^^^
102+
.. autofunction:: load_flickr25k_dataset
103+
104+
93105

94106
Load and save network
95107
----------------------
@@ -130,13 +142,28 @@ Load variables from .npy
130142
.. autofunction:: load_npy_to_any
131143

132144

133-
Visualizing npz file
134-
----------------------
135-
.. autofunction:: npz_to_W_pdf
145+
Folder/File functions
146+
------------------------
136147

148+
Check file exists
149+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
150+
.. autofunction:: file_exists
137151

138-
Folder functions
139-
------------------
152+
Check folder exists
153+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
154+
.. autofunction:: folder_exists
155+
156+
Delete file
157+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
158+
.. autofunction:: del_file
159+
160+
Delete folder
161+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
162+
.. autofunction:: del_folder
163+
164+
Read file
165+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
166+
.. autofunction:: read_file
140167

141168
Load file list from folder
142169
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -153,3 +180,14 @@ Check and Create folder
153180
Download or extract
154181
^^^^^^^^^^^^^^^^^^^^^^^^^
155182
.. autofunction:: maybe_download_and_extract
183+
184+
Sort
185+
-------
186+
187+
List of string with number in human order
188+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
189+
.. autofunction:: natural_keys
190+
191+
Visualizing npz file
192+
----------------------
193+
.. autofunction:: npz_to_W_pdf

docs/modules/visualize.rst

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ to visualize the model, activations etc. Here we provide more functions for data
88

99
.. autosummary::
1010

11+
read_image
1112
save_image
1213
save_images
1314
W
@@ -17,8 +18,12 @@ to visualize the model, activations etc. Here we provide more functions for data
1718
tsne_embedding
1819

1920

20-
Save images
21-
--------------
21+
Save and read images
22+
----------------------
23+
24+
Read one image
25+
^^^^^^^^^^^^^^^^^
26+
.. autofunction:: read_image
2227

2328
Save one image
2429
^^^^^^^^^^^^^^^^^^^^^^^^^^

tensorlayer/files.py

Lines changed: 113 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,6 @@ def load_mnist_labels(path, filename):
8383
y_test = np.asarray(y_test, dtype=np.int32)
8484
return X_train, y_train, X_val, y_val, X_test, y_test
8585

86-
8786
def load_cifar10_dataset(shape=(-1, 32, 32, 3), path='data/cifar10/', plotable=False, second=3):
8887
"""The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with
8988
6000 images per class. There are 50000 training images and 10000 test images.
@@ -218,7 +217,6 @@ def unpickle(file):
218217

219218
return X_train, y_train, X_test, y_test
220219

221-
222220
def load_ptb_dataset(path='data/ptb/'):
223221
"""Penn TreeBank (PTB) dataset is used in many LANGUAGE MODELING papers,
224222
including "Empirical Evaluation and Combination of Advanced Language
@@ -295,7 +293,6 @@ def load_ptb_dataset(path='data/ptb/'):
295293
# exit()
296294
return train_data, valid_data, test_data, vocabulary
297295

298-
299296
def load_matt_mahoney_text8_dataset(path='data/mm_test8/'):
300297
"""Download a text file from Matt Mahoney's website
301298
if not present, and make sure it's the right size.
@@ -331,7 +328,6 @@ def load_matt_mahoney_text8_dataset(path='data/mm_test8/'):
331328
word_list[idx] = word_list[idx].decode()
332329
return word_list
333330

334-
335331
def load_imdb_dataset(path='data/imdb/', nb_words=None, skip_top=0,
336332
maxlen=None, test_split=0.2, seed=113,
337333
start_char=1, oov_char=2, index_from=3):
@@ -506,6 +502,49 @@ def get_wmt_enfr_dev_set(path):
506502

507503
return train_path, dev_path
508504

505+
def load_flickr25k_dataset(tag='sky', path="data/flickr25k"):
506+
"""Returns a list of images by a given tag from Flick25k dataset,
507+
it will download Flickr25k from `the official website <http://press.liacs.nl/mirflickr/mirdownload.html>`_
508+
at the first time you use it.
509+
510+
Parameters
511+
------------
512+
tag : string like 'dog', 'red' see `Flickr Search <https://www.flickr.com/search/>`_.
513+
path : string
514+
Path to download data to, defaults to ``data/flickr25k/``
515+
516+
Examples
517+
-----------
518+
>>> images = tl.files.load_flickr25k_dataset(tag='sky')
519+
"""
520+
filename = 'mirflickr25k.zip'
521+
url = 'http://press.liacs.nl/mirflickr/mirflickr25k/'
522+
## download dataset
523+
if folder_exists(path+"/mirflickr") is False:
524+
print("[*] Flickr25k is nonexistent in {}".format(path))
525+
maybe_download_and_extract(filename, path, url, extract=True)
526+
del_file(path+'/'+filename)
527+
## return images by the given tag.
528+
# 1. image path list
529+
folder_imgs = "data/flickr25k/mirflickr"
530+
path_imgs = load_file_list(path=folder_imgs, regx='\\.jpg', printable=False)
531+
path_imgs.sort(key=natural_keys)
532+
# print(path_imgs[0:10])
533+
# 2. tag path list
534+
folder_tags = "data/flickr25k/mirflickr/meta/tags"
535+
path_tags = load_file_list(path=folder_tags, regx='\\.txt', printable=False)
536+
path_tags.sort(key=natural_keys)
537+
# print(path_tags[0:10])
538+
# 3. select images
539+
images = []
540+
for idx in range(0, len(path_tags)):
541+
tags = read_file(folder_tags+'/'+path_tags[idx]).split('\n')
542+
# print(idx+1, tags)
543+
if tag in tags:
544+
images.append(visualize.read_image(path_imgs[idx], folder_imgs))
545+
# print(idx+1, tags)
546+
# exit()
547+
return images
509548

510549
## Load and save network
511550
def save_npz(save_list=[], name='model.npz', sess=None):
@@ -725,7 +764,7 @@ def load_and_assign_npz(sess=None, name=None, network=None):
725764
print("[*] Load {} SUCCESS!".format(name))
726765
return network
727766

728-
# Load and save variables
767+
## Load and save variables
729768
def save_any_to_npy(save_dict={}, name='file.npy'):
730769
"""Save variables to .npy file.
731770
@@ -758,30 +797,33 @@ def load_npy_to_any(path='', name='file.npy'):
758797
exit()
759798

760799

761-
# Visualizing npz files
762-
def npz_to_W_pdf(path=None, regx='w1pre_[0-9]+\.(npz)'):
763-
"""Convert the first weight matrix of .npz file to .pdf by using tl.visualize.W().
800+
## Folder functions
801+
def file_exists(filepath):
802+
""" Check whether a file exists by given file path. """
803+
return os.path.isfile(filepath)
764804

765-
Parameters
766-
----------
767-
path : a string or None
768-
A folder path to npz files.
769-
regx : a string
770-
Regx for the file name.
805+
def folder_exists(folderpath):
806+
""" Check whether a folder exists by given folder path. """
807+
return os.path.isdir(folderpath)
808+
809+
def del_file(filepath):
810+
""" Delete a file by given file path. """
811+
os.remove(filepath)
812+
813+
def del_folder(folderpath):
814+
""" Delete a folder by given folder path. """
815+
os.rmdir(folderpath)
816+
817+
def read_file(filepath):
818+
""" Read a file and return a string.
771819
772820
Examples
773-
--------
774-
>>> Convert the first weight matrix of w1_pre...npz file to w1_pre...pdf.
775-
>>> tl.files.npz_to_W_pdf(path='/Users/.../npz_file/', regx='w1pre_[0-9]+\.(npz)')
821+
---------
822+
>>> data = tl.files.read_file('data.txt')
776823
"""
777-
file_list = load_file_list(path=path, regx=regx)
778-
for f in file_list:
779-
W = load_npz(path, f)[0]
780-
print("%s --> %s" % (f, f.split('.')[0]+'.pdf'))
781-
visualize.W(W, second=10, saveable=True, name=f.split('.')[0], fig_idx=2012)
782-
824+
with open(filepath, 'r') as afile:
825+
return afile.read()
783826

784-
## Helper functions
785827
def load_file_list(path=None, regx='\.npz', printable=True):
786828
"""Return a file list in a folder by given a path and regular expression.
787829
@@ -854,7 +896,7 @@ def maybe_download_and_extract(filename, working_directory, url_source, extract=
854896
and optionally also tries to extract the file if format is ".zip" or ".tar"
855897
856898
Parameters
857-
----------
899+
-----------
858900
filename : string
859901
The name of the (to be) dowloaded file.
860902
working_directory : string
@@ -866,6 +908,7 @@ def maybe_download_and_extract(filename, working_directory, url_source, extract=
866908
expected_bytes : int/None
867909
If set tries to verify that the downloaded file is of the specified size, otherwise raises an Exception,
868910
defaults to None which corresponds to no check being performed
911+
869912
Returns
870913
----------
871914
filepath to dowloaded (uncompressed) file
@@ -917,3 +960,48 @@ def _dlProgress(count, blockSize, totalSize):
917960
else:
918961
print("Unknown compression_format only .tar.gz/.tar.bz2/.tar and .zip supported")
919962
return filepath
963+
964+
965+
## Sort
966+
def natural_keys(text):
967+
"""Sort list of string with number in human order.
968+
969+
Examples
970+
----------
971+
>>> l = ['im1.jpg', 'im31.jpg', 'im11.jpg', 'im21.jpg', 'im03.jpg', 'im05.jpg']
972+
>>> l.sort(key=tl.files.natural_keys)
973+
... ['im1.jpg', 'im03.jpg', 'im05', 'im11.jpg', 'im21.jpg', 'im31.jpg']
974+
>>> l.sort() # that is what we dont want
975+
... ['im03.jpg', 'im05', 'im1.jpg', 'im11.jpg', 'im21.jpg', 'im31.jpg']
976+
977+
Reference
978+
----------
979+
alist.sort(key=natural_keys) sorts in human order
980+
http://nedbatchelder.com/blog/200712/human_sorting.html
981+
(See Toothy's implementation in the comments)
982+
"""
983+
def atoi(text):
984+
return int(text) if text.isdigit() else text
985+
return [ atoi(c) for c in re.split('(\d+)', text) ]
986+
987+
# Visualizing npz files
988+
def npz_to_W_pdf(path=None, regx='w1pre_[0-9]+\.(npz)'):
989+
"""Convert the first weight matrix of .npz file to .pdf by using tl.visualize.W().
990+
991+
Parameters
992+
----------
993+
path : a string or None
994+
A folder path to npz files.
995+
regx : a string
996+
Regx for the file name.
997+
998+
Examples
999+
--------
1000+
>>> Convert the first weight matrix of w1_pre...npz file to w1_pre...pdf.
1001+
>>> tl.files.npz_to_W_pdf(path='/Users/.../npz_file/', regx='w1pre_[0-9]+\.(npz)')
1002+
"""
1003+
file_list = load_file_list(path=path, regx=regx)
1004+
for f in file_list:
1005+
W = load_npz(path, f)[0]
1006+
print("%s --> %s" % (f, f.split('.')[0]+'.pdf'))
1007+
visualize.W(W, second=10, saveable=True, name=f.split('.')[0], fig_idx=2012)

tensorlayer/visualize.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,17 @@
1717
## Save images
1818
import scipy.misc
1919

20-
def save_image(image, image_path):
20+
def read_image(image, image_path=''):
21+
""" Read one image.
22+
23+
Parameters
24+
-----------
25+
images : string, file name.
26+
image_path : string, path.
27+
"""
28+
return scipy.misc.imread(os.path.join(image_path, image))
29+
30+
def save_image(image, image_path=''):
2131
"""Save one image.
2232
2333
Parameters
@@ -31,7 +41,7 @@ def save_image(image, image_path):
3141
scipy.misc.imsave(image_path, image[:,:,0])
3242

3343

34-
def save_images(images, size, image_path):
44+
def save_images(images, size, image_path=''):
3545
"""Save mutiple images into one single image.
3646
3747
Parameters

0 commit comments

Comments
 (0)