Skip to content

Commit 9c3cb54

Browse files
crainbowshardikvasa
authored andcommitted
Skip images with certain urls (#213)
* Add flag to ignore certain urls * Updated README and parser arguments * Fix error on missing -iu option Resolves issues related to pull request #213. Removed duplicated validation
1 parent 7679d9b commit 9c3cb54

File tree

1 file changed

+9
-5
lines changed

1 file changed

+9
-5
lines changed

google_images_download/google_images_download.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
"output_directory", "image_directory", "no_directory", "proxy", "similar_images", "specific_site",
4141
"print_urls", "print_size", "print_paths", "metadata", "extract_metadata", "socket_timeout",
4242
"thumbnail", "thumbnail_only", "language", "prefix", "chromedriver", "related_images", "safe_search", "no_numbering",
43-
"offset", "no_download","save_source","silent_mode"]
43+
"offset", "no_download","save_source","silent_mode","ignore_urls"]
4444

4545

4646
def user_input():
@@ -112,6 +112,7 @@ def user_input():
112112
parser.add_argument('-nn', '--no_numbering', default=False, help="Allows you to exclude the default numbering of images", action="store_true")
113113
parser.add_argument('-of', '--offset', help="Where to start in the fetched links", type=str, required=False)
114114
parser.add_argument('-nd', '--no_download', default=False, help="Prints the URLs of the images and/or thumbnails without downloading them", action="store_true")
115+
parser.add_argument('-iu', '--ignore_urls', default=False, help="delimited list input of image urls/keywords to ignore", type=str)
115116
parser.add_argument('-sil', '--silent_mode', default=False, help="Remains silent. Does not print notification messages on the terminal", action="store_true")
116117
parser.add_argument('-is', '--save_source', help="creates a text file containing a list of downloaded images along with source page url", type=str, required=False)
117118

@@ -507,7 +508,7 @@ def create_directories(self,main_directory, dir_name,thumbnail,thumbnail_only):
507508

508509

509510
# Download Images
510-
def download_image_thumbnail(self,image_url,main_directory,dir_name,return_image_name,print_urls,socket_timeout,print_size,no_download,save_source,img_src):
511+
def download_image_thumbnail(self,image_url,main_directory,dir_name,return_image_name,print_urls,socket_timeout,print_size,no_download,save_source,img_src,ignore_urls):
511512
if print_urls or no_download:
512513
print("Image URL: " + image_url)
513514
if no_download:
@@ -574,7 +575,10 @@ def download_image_thumbnail(self,image_url,main_directory,dir_name,return_image
574575

575576

576577
# Download Images
577-
def download_image(self,image_url,image_format,main_directory,dir_name,count,print_urls,socket_timeout,prefix,print_size,no_numbering,no_download,save_source,img_src,silent_mode,thumbnail_only,format):
578+
def download_image(self,image_url,image_format,main_directory,dir_name,count,print_urls,socket_timeout,prefix,print_size,no_numbering,no_download,save_source,img_src,silent_mode,thumbnail_only,format,ignore_urls):
579+
if ignore_urls:
580+
if any(url in image_url for url in ignore_urls.split(',')):
581+
return "fail","Image ignored",None,None
578582
if thumbnail_only:
579583
return "success", "Skipping image download...", str(image_url[(image_url.rfind('/')) + 1:]), image_url
580584
if not silent_mode:
@@ -759,14 +763,14 @@ def _get_all_items(self,page,main_directory,dir_name,limit,arguments):
759763
print("\nImage Metadata: " + str(object))
760764

761765
#download the images
762-
download_status,download_message,return_image_name,absolute_path = self.download_image(object['image_link'],object['image_format'],main_directory,dir_name,count,arguments['print_urls'],arguments['socket_timeout'],arguments['prefix'],arguments['print_size'],arguments['no_numbering'],arguments['no_download'],arguments['save_source'],object['image_source'],arguments["silent_mode"],arguments["thumbnail_only"],arguments['format'])
766+
download_status,download_message,return_image_name,absolute_path = self.download_image(object['image_link'],object['image_format'],main_directory,dir_name,count,arguments['print_urls'],arguments['socket_timeout'],arguments['prefix'],arguments['print_size'],arguments['no_numbering'],arguments['no_download'],arguments['save_source'],object['image_source'],arguments["silent_mode"],arguments["thumbnail_only"],arguments['format'],arguments['ignore_urls'])
763767
if not arguments["silent_mode"]:
764768
print(download_message)
765769
if download_status == "success":
766770

767771
# download image_thumbnails
768772
if arguments['thumbnail'] or arguments["thumbnail_only"]:
769-
download_status, download_message_thumbnail = self.download_image_thumbnail(object['image_thumbnail_url'],main_directory,dir_name,return_image_name,arguments['print_urls'],arguments['socket_timeout'],arguments['print_size'],arguments['no_download'],arguments['save_source'],object['image_source'])
773+
download_status, download_message_thumbnail = self.download_image_thumbnail(object['image_thumbnail_url'],main_directory,dir_name,return_image_name,arguments['print_urls'],arguments['socket_timeout'],arguments['print_size'],arguments['no_download'],arguments['save_source'],object['image_source'],arguments['ignore_urls'])
770774
if not arguments["silent_mode"]:
771775
print(download_message_thumbnail)
772776

0 commit comments

Comments
 (0)