@@ -513,15 +513,15 @@ def load_flickr25k_dataset(tag='sky', path="data/flickr25k", n_threads=50, print
513513 If you want to get images with tag, use string like 'dog', 'red', see `Flickr Search <https://www.flickr.com/search/>`_.
514514 If you want to get all images, set to ``None``.
515515 path : string
516- The path that the data is downloaded to, defaults is ``data/flickr25k/``
516+ The path that the data is downloaded to, defaults is ``data/flickr25k/``.
517517 n_threads : int, number of thread to read image.
518518 printable : bool, print infomation when reading images, default is False.
519519
520520 Examples
521521 -----------
522522 - Get images with tag of sky
523523 >>> images = tl.files.load_flickr25k_dataset(tag='sky')
524-
524+
525525 - Get all images
526526 >>> images = tl.files.load_flickr25k_dataset(tag=None, n_threads=100, printable=True)
527527 """
@@ -534,12 +534,12 @@ def load_flickr25k_dataset(tag='sky', path="data/flickr25k", n_threads=50, print
534534 del_file (path + '/' + filename )
535535 ## return images by the given tag.
536536 # 1. image path list
537- folder_imgs = "data/flickr25k /mirflickr"
537+ folder_imgs = path + " /mirflickr"
538538 path_imgs = load_file_list (path = folder_imgs , regx = '\\ .jpg' , printable = False )
539539 path_imgs .sort (key = natural_keys )
540540 # print(path_imgs[0:10])
541541 # 2. tag path list
542- folder_tags = "data/flickr25k /mirflickr/meta/tags"
542+ folder_tags = path + " /mirflickr/meta/tags"
543543 path_tags = load_file_list (path = folder_tags , regx = '\\ .txt' , printable = False )
544544 path_tags .sort (key = natural_keys )
545545 # print(path_tags[0:10])
@@ -559,6 +559,91 @@ def load_flickr25k_dataset(tag='sky', path="data/flickr25k", n_threads=50, print
559559 return images
560560
561561
562+
563+ def load_flickr1M_dataset (tag = 'sky' , size = 10 , path = "data/flickr1M" , n_threads = 50 , printable = False ):
564+ """Returns a list of images by a given tag from Flickr1M dataset,
565+ it will download Flickr1M from `the official website <http://press.liacs.nl/mirflickr/mirdownload.html>`_
566+ at the first time you use it.
567+
568+ Parameters
569+ ------------
570+ tag : string or None
571+ If you want to get images with tag, use string like 'dog', 'red', see `Flickr Search <https://www.flickr.com/search/>`_.
572+ If you want to get all images, set to ``None``.
573+ size : int 1 to 10.
574+ 1 means 100k images ... 5 means 500k images, 10 means all 1 million images. Default is 10.
575+ path : string
576+ The path that the data is downloaded to, defaults is ``data/flickr25k/``.
577+ n_threads : int, number of thread to read image.
578+ printable : bool, print infomation when reading images, default is False.
579+ """
580+ print ("[Flickr1M] using {}% of images = {}" .format (size * 10 , size * 100000 ))
581+ images_zip = ['images0.zip' , 'images1.zip' , 'images2.zip' , 'images3.zip' ,
582+ 'images4.zip' , 'images5.zip' , 'images6.zip' , 'images7.zip' ,
583+ 'images8.zip' , 'images9.zip' ]
584+ tag_zip = 'tags.zip'
585+ url = 'http://press.liacs.nl/mirflickr/mirflickr1m/'
586+ ## download dataset
587+ for image_zip in images_zip [0 :size ]:
588+ image_folder = image_zip .split ("." )[0 ]
589+ # print(path+"/"+image_folder)
590+ if folder_exists (path + "/" + image_folder ) is False :
591+ # print(image_zip)
592+ print ("[Flickr1M] {} is missing in {}" .format (image_folder , path ))
593+ maybe_download_and_extract (image_zip , path , url , extract = True )
594+ del_file (path + '/' + image_zip )
595+ os .system ("mv {} {}" .format (path + '/images' ,path + '/' + image_folder ))
596+ else :
597+ print ("[Flickr1M] {} exists in {}" .format (image_folder , path ))
598+ ## download tag
599+ if folder_exists (path + "/tags" ) is False :
600+ print ("[Flickr1M] tag files is nonexistent in {}" .format (path ))
601+ maybe_download_and_extract (tag_zip , path , url , extract = True )
602+ del_file (path + '/' + tag_zip )
603+ else :
604+ print ("[Flickr1M] tags exists in {}" .format (path ))
605+
606+ ## 1. image path list
607+ images_list = []
608+ images_folder_list = []
609+ for i in range (0 , size ):
610+ images_folder_list += load_folder_list (path = path + '/images%d' % i )
611+ images_folder_list .sort (key = lambda s : int (s .split ('/' )[- 1 ])) # folder/images/ddd
612+ # print(images_folder_list)
613+ # exit()
614+ for folder in images_folder_list [0 :size * 10 ]:
615+ tmp = load_file_list (path = folder , regx = '\\ .jpg' , printable = False )
616+ tmp .sort (key = lambda s : int (s .split ('.' )[- 2 ])) # ddd.jpg
617+ # print(tmp[0::570])
618+ images_list .extend ([folder + '/' + x for x in tmp ])
619+ # print('IM', len(images_list), images_list[0::6000])
620+ ## 2. tag path list
621+ tag_list = []
622+ tag_folder_list = load_folder_list (path + "/tags" )
623+ tag_folder_list .sort (key = lambda s : int (s .split ('/' )[- 1 ])) # folder/images/ddd
624+
625+ for folder in tag_folder_list [0 :size * 10 ]:
626+ # print(folder)
627+ tmp = load_file_list (path = folder , regx = '\\ .txt' , printable = False )
628+ tmp .sort (key = lambda s : int (s .split ('.' )[- 2 ])) # ddd.txt
629+ tmp = [folder + '/' + s for s in tmp ]
630+ tag_list += tmp
631+ # print('T', len(tag_list), tag_list[0::6000])
632+ # exit()
633+ ## 3. select images
634+ print ("[Flickr1M] searching tag: {}" .format (tag ))
635+ select_images_list = []
636+ for idx in range (0 , len (tag_list )):
637+ tags = read_file (tag_list [idx ]).split ('\n ' )
638+ if tag in tags :
639+ select_images_list .append (images_list [idx ])
640+ # print(idx, tags, tag_list[idx], images_list[idx])
641+ print ("[Flickr1M] reading images with tag: {}" .format (tag ))
642+ images = visualize .read_images (select_images_list , '' , n_threads = n_threads , printable = printable )
643+ return images
644+
645+
646+
562647## Load and save network
563648def save_npz (save_list = [], name = 'model.npz' , sess = None ):
564649 """Input parameters and the file name, save parameters into .npz file. Use tl.utils.load_npz() to restore.
0 commit comments