@@ -83,7 +83,6 @@ def load_mnist_labels(path, filename):
8383 y_test = np .asarray (y_test , dtype = np .int32 )
8484 return X_train , y_train , X_val , y_val , X_test , y_test
8585
86-
8786def load_cifar10_dataset (shape = (- 1 , 32 , 32 , 3 ), path = 'data/cifar10/' , plotable = False , second = 3 ):
8887 """The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with
8988 6000 images per class. There are 50000 training images and 10000 test images.
@@ -218,7 +217,6 @@ def unpickle(file):
218217
219218 return X_train , y_train , X_test , y_test
220219
221-
222220def load_ptb_dataset (path = 'data/ptb/' ):
223221 """Penn TreeBank (PTB) dataset is used in many LANGUAGE MODELING papers,
224222 including "Empirical Evaluation and Combination of Advanced Language
@@ -295,7 +293,6 @@ def load_ptb_dataset(path='data/ptb/'):
295293 # exit()
296294 return train_data , valid_data , test_data , vocabulary
297295
298-
299296def load_matt_mahoney_text8_dataset (path = 'data/mm_test8/' ):
300297 """Download a text file from Matt Mahoney's website
301298 if not present, and make sure it's the right size.
@@ -331,7 +328,6 @@ def load_matt_mahoney_text8_dataset(path='data/mm_test8/'):
331328 word_list [idx ] = word_list [idx ].decode ()
332329 return word_list
333330
334-
335331def load_imdb_dataset (path = 'data/imdb/' , nb_words = None , skip_top = 0 ,
336332 maxlen = None , test_split = 0.2 , seed = 113 ,
337333 start_char = 1 , oov_char = 2 , index_from = 3 ):
@@ -506,6 +502,49 @@ def get_wmt_enfr_dev_set(path):
506502
507503 return train_path , dev_path
508504
505+ def load_flickr25k_dataset (tag = 'sky' , path = "data/flickr25k" ):
506+ """Returns a list of images by a given tag from Flick25k dataset,
507+ it will download Flickr25k from `the official website <http://press.liacs.nl/mirflickr/mirdownload.html>`_
508+ at the first time you use it.
509+
510+ Parameters
511+ ------------
512+ tag : string like 'dog', 'red' see `Flickr Search <https://www.flickr.com/search/>`_.
513+ path : string
514+ Path to download data to, defaults to ``data/flickr25k/``
515+
516+ Examples
517+ -----------
518+ >>> images = tl.files.load_flickr25k_dataset(tag='sky')
519+ """
520+ filename = 'mirflickr25k.zip'
521+ url = 'http://press.liacs.nl/mirflickr/mirflickr25k/'
522+ ## download dataset
523+ if folder_exists (path + "/mirflickr" ) is False :
524+ print ("[*] Flickr25k is nonexistent in {}" .format (path ))
525+ maybe_download_and_extract (filename , path , url , extract = True )
526+ del_file (path + '/' + filename )
527+ ## return images by the given tag.
528+ # 1. image path list
529+ folder_imgs = "data/flickr25k/mirflickr"
530+ path_imgs = load_file_list (path = folder_imgs , regx = '\\ .jpg' , printable = False )
531+ path_imgs .sort (key = natural_keys )
532+ # print(path_imgs[0:10])
533+ # 2. tag path list
534+ folder_tags = "data/flickr25k/mirflickr/meta/tags"
535+ path_tags = load_file_list (path = folder_tags , regx = '\\ .txt' , printable = False )
536+ path_tags .sort (key = natural_keys )
537+ # print(path_tags[0:10])
538+ # 3. select images
539+ images = []
540+ for idx in range (0 , len (path_tags )):
541+ tags = read_file (folder_tags + '/' + path_tags [idx ]).split ('\n ' )
542+ # print(idx+1, tags)
543+ if tag in tags :
544+ images .append (visualize .read_image (path_imgs [idx ], folder_imgs ))
545+ # print(idx+1, tags)
546+ # exit()
547+ return images
509548
510549## Load and save network
511550def save_npz (save_list = [], name = 'model.npz' , sess = None ):
@@ -725,7 +764,7 @@ def load_and_assign_npz(sess=None, name=None, network=None):
725764 print ("[*] Load {} SUCCESS!" .format (name ))
726765 return network
727766
728- # Load and save variables
767+ ## Load and save variables
729768def save_any_to_npy (save_dict = {}, name = 'file.npy' ):
730769 """Save variables to .npy file.
731770
@@ -758,30 +797,33 @@ def load_npy_to_any(path='', name='file.npy'):
758797 exit ()
759798
760799
761- # Visualizing npz files
762- def npz_to_W_pdf (path = None , regx = 'w1pre_[0-9]+\.(npz)' ):
763- """Convert the first weight matrix of .npz file to .pdf by using tl.visualize.W().
800+ ## Folder functions
801+ def file_exists (filepath ):
802+ """ Check whether a file exists by given file path. """
803+ return os .path .isfile (filepath )
764804
765- Parameters
766- ----------
767- path : a string or None
768- A folder path to npz files.
769- regx : a string
770- Regx for the file name.
805+ def folder_exists (folderpath ):
806+ """ Check whether a folder exists by given folder path. """
807+ return os .path .isdir (folderpath )
808+
809+ def del_file (filepath ):
810+ """ Delete a file by given file path. """
811+ os .remove (filepath )
812+
813+ def del_folder (folderpath ):
814+ """ Delete a folder by given folder path. """
815+ os .rmdir (folderpath )
816+
817+ def read_file (filepath ):
818+ """ Read a file and return a string.
771819
772820 Examples
773- --------
774- >>> Convert the first weight matrix of w1_pre...npz file to w1_pre...pdf.
775- >>> tl.files.npz_to_W_pdf(path='/Users/.../npz_file/', regx='w1pre_[0-9]+\.(npz)')
821+ ---------
822+ >>> data = tl.files.read_file('data.txt')
776823 """
777- file_list = load_file_list (path = path , regx = regx )
778- for f in file_list :
779- W = load_npz (path , f )[0 ]
780- print ("%s --> %s" % (f , f .split ('.' )[0 ]+ '.pdf' ))
781- visualize .W (W , second = 10 , saveable = True , name = f .split ('.' )[0 ], fig_idx = 2012 )
782-
824+ with open (filepath , 'r' ) as afile :
825+ return afile .read ()
783826
784- ## Helper functions
785827def load_file_list (path = None , regx = '\.npz' , printable = True ):
786828 """Return a file list in a folder by given a path and regular expression.
787829
@@ -854,7 +896,7 @@ def maybe_download_and_extract(filename, working_directory, url_source, extract=
854896 and optionally also tries to extract the file if format is ".zip" or ".tar"
855897
856898 Parameters
857- ----------
899+ -----------
858900 filename : string
859901 The name of the (to be) dowloaded file.
860902 working_directory : string
@@ -866,6 +908,7 @@ def maybe_download_and_extract(filename, working_directory, url_source, extract=
866908 expected_bytes : int/None
867909 If set tries to verify that the downloaded file is of the specified size, otherwise raises an Exception,
868910 defaults to None which corresponds to no check being performed
911+
869912 Returns
870913 ----------
871914 filepath to dowloaded (uncompressed) file
@@ -917,3 +960,48 @@ def _dlProgress(count, blockSize, totalSize):
917960 else :
918961 print ("Unknown compression_format only .tar.gz/.tar.bz2/.tar and .zip supported" )
919962 return filepath
963+
964+
965+ ## Sort
966+ def natural_keys (text ):
967+ """Sort list of string with number in human order.
968+
969+ Examples
970+ ----------
971+ >>> l = ['im1.jpg', 'im31.jpg', 'im11.jpg', 'im21.jpg', 'im03.jpg', 'im05.jpg']
972+ >>> l.sort(key=tl.files.natural_keys)
973+ ... ['im1.jpg', 'im03.jpg', 'im05', 'im11.jpg', 'im21.jpg', 'im31.jpg']
974+ >>> l.sort() # that is what we dont want
975+ ... ['im03.jpg', 'im05', 'im1.jpg', 'im11.jpg', 'im21.jpg', 'im31.jpg']
976+
977+ Reference
978+ ----------
979+ alist.sort(key=natural_keys) sorts in human order
980+ http://nedbatchelder.com/blog/200712/human_sorting.html
981+ (See Toothy's implementation in the comments)
982+ """
983+ def atoi (text ):
984+ return int (text ) if text .isdigit () else text
985+ return [ atoi (c ) for c in re .split ('(\d+)' , text ) ]
986+
987+ # Visualizing npz files
988+ def npz_to_W_pdf (path = None , regx = 'w1pre_[0-9]+\.(npz)' ):
989+ """Convert the first weight matrix of .npz file to .pdf by using tl.visualize.W().
990+
991+ Parameters
992+ ----------
993+ path : a string or None
994+ A folder path to npz files.
995+ regx : a string
996+ Regx for the file name.
997+
998+ Examples
999+ --------
1000+ >>> Convert the first weight matrix of w1_pre...npz file to w1_pre...pdf.
1001+ >>> tl.files.npz_to_W_pdf(path='/Users/.../npz_file/', regx='w1pre_[0-9]+\.(npz)')
1002+ """
1003+ file_list = load_file_list (path = path , regx = regx )
1004+ for f in file_list :
1005+ W = load_npz (path , f )[0 ]
1006+ print ("%s --> %s" % (f , f .split ('.' )[0 ]+ '.pdf' ))
1007+ visualize .W (W , second = 10 , saveable = True , name = f .split ('.' )[0 ], fig_idx = 2012 )
0 commit comments