From 0c5e7c1a5b6e7341e364faf1f9e03c753f15b4cf Mon Sep 17 00:00:00 2001 From: Hardik Vasa Date: Mon, 26 Feb 2018 20:36:46 -0800 Subject: [PATCH 01/83] Feature/update pypi (#39) (#40) * chg: doc: license on setup.py * chg: doc: rename README.md to README.rst * chg: doc: syntax from markdown to rst * chg: doc: command on readme * chg: doc: re add table * chg: doc: remove duplicate example * chg: doc: remove readme convertion rst is already default readme filetype * chg: doc: update table to latest upstream commit * chg: doc: update classifiers to latest upstream commit * chg: dev: update print_urls description --- README.md | 140 ----------------------------------- README.rst | 212 +++++++++++++++++++++++++++++++++++++++++++++++++++++ setup.py | 34 +++------ 3 files changed, 222 insertions(+), 164 deletions(-) delete mode 100644 README.md create mode 100644 README.rst diff --git a/README.md b/README.md deleted file mode 100644 index 92ec9c52..00000000 --- a/README.md +++ /dev/null @@ -1,140 +0,0 @@ -# Google Images Download -Python Script for 'searching' and 'downloading' hundreds of Google images to the local hard disk! - -## Summary -This is a command line python program to search keywords/key-phrases on Google Images and then also optionally download one or more images to your computer. This is a small program which is ready-to-run, but still under development. Many more features will be added to it going forward. - -## Compatability -This program is compatible with both the versions of python (2.x and 3.x). It is a download-and-run program with no changes to the file. You will just have to specify parameters through the command line. -___ - -## Installation -**Using pip:** -``` -$ pip install google_images_download -``` - -**Manually:** -``` -$ git clone https://github.com/hardikvasa/google-images-download.git -$ cd google-images-download && sudo python setup.py install -``` - -## Usage -Go to the directory where you have the `google_images_download.py` file and type in the command: - -**Python3:** python3 google_images_download.py [Arguments...] - -**OR** - -**Python2:** python google_images_download.py [Arguments...] - - -### Arguments - -| Argument | Short hand | Description | -| --- | :---: | --- | -|**keywords**| k | Denotes the keywords/key phrases you want to search for and the directory file name.
Tips:
* If you simply type the keyword, Google will best try to match it
* If you want to search for exact phrase, you can wrap the keywords in double quotes ("")
* If you want to search to contain either of the words provided, use **OR** between the words.
* If you want to explicitly not want a specific word use a minus sign before the word (-)| -|**suffix_keywords**| sk | Denotes additional words added after main keyword while making the search query. Useful when you have multiple suffix keywords for one keyword
The final search query would be: | -|**limit** | l |Denotes number of images that you want to download. | -|**format** | f |Denotes the format/extension that you want to download.
`Possible values: jpg, gif, png, bmp, svg, webp, ico`| -|**color** | c |Denotes the color filter that you want to apply to the images.
`Possible values: red, orange, yellow, green, teal, blue, purple, pink, white, gray, black, brown`| -|**color_type** | ct |Denotes the color type you want to apply to the images.
`Possible values: full-color, black-and-white, transparent`| -|**usage_rights** | r |Denotes the usage rights/licence under which the image is classified.
`Possible values: labled-for-reuse-with-modifications, labled-for-reuse, labled-for-noncommercial-reuse-with-modification, labled-for-nocommercial-reuse`| -|**size** | s |Denotes the relative size of the image to be downloaded.
`Possible values: large, medium, icon`| -|**aspect_ratio** | a |Denotes the aspect ration of images to download.
`Possible values: tall, square, wide, panoramic`| -|**type** | t |Denotes the type of image to be downloaded.
`Possible values: face,photo,clip-art,line-drawing,animated`| -|**time** | w |Denotes the time the image was uploaded/indexed.
`Possible values: past-24-hours, past-7-days`| -|**delay** | d |Time to wait between downloading two images| -|**url** | u |Allows you search by image. It downloads images from the google images link provided| -|**single_image** | x |Allows you to download one image if the complete URL of the image is provided| -|**output_directory** | o |Allows you specify the main directory name. If not specified, it will default to 'downloads'| -|**similar_images** | si |Reverse Image Search. Searches and downloads images that are similar to the image link/url you provide.| -|**specific_site** | ss |Allows you to download images with keywords only from a specific website/domain name you mention as indexed in Google Images.| -|**print_urls** | p |Print the URLs of the images on the console. These image URLs can be used for debugging purposes| -|**help** | h |show the help message regarding the usage of the above arguments| - -**Note:** If `single_image` or `url` parameter is not present, then keywords is a mandatory parameter. No other parameters are mandatory. - -## Examples -* If you have python 2.x version installed - -`python google-images-download.py --keywords "Polar bears, baloons, Beaches" --limit 20` - -* If you have python 3.x version installed - -`python3 google-images-download.py --keywords "Polar bears, baloons, Beaches" --limit 20` - -* Using Suffix Keywords allows you to specify words after the main keywords. For example if the `keyword = car` and `suffix keyword = 'red,blue'` then it will first search for `car red` and then `car blue` - -`python3 google-images-download.py --k "car" -sk 'red,blue,white' -l 10` - -* To use the short hand command - -`python google-images-download.py -k "Polar bears, baloons, Beaches" -l 20` - -* To download images with specific image extension/format - -`python google-images-download.py --keywords "logo" --format svg` - -* To use color filters for the images - -`python google-images-download.py -k "playground" -l 20 -c red` - -* To use non-English keywords for image search - -`python google-images-download.py -k "北极熊" -l 5` - -* To download images from the google images link - -`python google-images-download.py -k "sample" -u ` - -* To save images in specific main directory (instead of in 'downloads') - -`python google-images-download.py -k "boat" -o "boat_new"` - -* To download one single image with the image URL - -`python google-images-download.py --keywords "baloons" --single_image ` - -* To download images with size and type constrains - -`python google-images-download.py --keywords "baloons" --size medium --type animated` - -* To download images with specific usage rights - -`python google-images-download.py --keywords "universe" --usage_rights labled-for-reuse` - -* To download images with specific color type - -`python google-images-download.py --keywords "flowers" --color_type black-and-white` - -* To download images with specific aspect ratio - -`python google-images-download.py --keywords "universe" --aspect_ratio panoramic` - -* To download images which are similar to the image in the image URL that you provided (Reverse Image search). - -`python3 pr.py -si -l 10` - -* To download images from specific website or domain name for a given keyword - -`python google-images-download.py --keywords "universe" --specific_site example.com` - -===> The images would be downloaded in their own sub-directories inside the main directory (either the one you provided or in 'downloads') in the same folder as the `google_images_download.py` file. - - -___ - -## SSL Errors -If you do see SSL errors on Mac for Python 3 please go to Finder —> Applications —> Python 3 —> Click on the ‘Install Certificates.command’ and run the file. - -## Contribute -Anyone is welcomed to contribute to this script. If you would like to make a change, open a pull request. For issues and discussion visit the [Issue Tracker](https://github.com/hardikvasa/google-images-download/issues). - -The aim of this repo is to keep it simple, stand-alone, backward compatible and 3rd party dependency proof. - -## Disclaimer -This program lets you download tons of images from Google. Please do not download any image without violating its copyright terms. Google Images is a search engine that merely indexes images and allows you to find them. It does NOT produce its own images and, as such, it doesn't own copyright on any of them. The original creators of the images own the copyrights. - -Images published in the United States are automatically copyrighted by their owners, even if they do not explicitly carry a copyright warning. You may not reproduce copyright images without their owner's permission, except in "fair use" cases, or you could risk running into lawyer's warnings, cease-and-desist letters, and copyright suits. Please be very careful before its usage! diff --git a/README.rst b/README.rst new file mode 100644 index 00000000..fb6aff11 --- /dev/null +++ b/README.rst @@ -0,0 +1,212 @@ +Google Images Download +====================== + +Python Script for 'searching' and 'downloading' hundreds of Google images to the local hard disk! + +Summary +------- + +This is a command line python program to search keywords/key-phrases on Google Images +and then also optionally download one or more images to your computer. +This is a small program which is ready-to-run, but still under development. +Many more features will be added to it going forward. + +Compatability +------------- + +This program is compatible with both the versions of python (2.x and 3.x). +It is a download-and-run program with no changes to the file. +You will just have to specify parameters through the command line. + +Installation +------------ + +**Using pip:** + +:: + + $ pip install google_images_download + +**Manually:** + +:: + + $ git clone https://github.com/hardikvasa/google-images-download.git + $ cd google-images-download && sudo python setup.py install + +Usage +----- + +:: + + $ googleimagesdownload [Arguments...] + +Arguments +~~~~~~~~~ + ++------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| Argument | Short hand | Description | ++==================+=============+===============================================================================================================================+ +| keywords | k | Denotes the keywords/key phrases you want to search for and the directory file name. | +| | | Tips: | +| | | * If you simply type the keyword, Google will best try to match it | +| | | * If you want to search for exact phrase, you can wrap the keywords in double quotes ("") | +| | | * If you want to search to contain either of the words provided, use **OR** between the words. | +| | | * If you want to explicitly not want a specific word use a minus sign before the word (-) | ++------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| suffix_keywords | sk | Denotes additional words added after main keyword while making the search query. | +| | | Useful when you have multiple suffix keywords for one keyword. | +| | | The final search query would be: | ++------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| limit | l | Denotes number of images that you want to download. | ++------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| format | f | Denotes the format/extension that you want to download. | +| | | `Possible values: jpg, gif, png, bmp, svg, webp, ico` | ++------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| color | c | Denotes the color filter that you want to apply to the images. | +| | | `Possible values: | +| | | red, orange, yellow, green, teal, blue, purple, pink, white, gray, black, brown` | ++------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| color_type | ct | Denotes the color type you want to apply to the images. | +| | | `Possible values: full-color, black-and-white, transparent` | ++------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| usage_rights | r | Denotes the usage rights/licence under which the image is classified. | +| | | `Possible values: | +| | | * labled-for-reuse-with-modifications, | +| | | * labled-for-reuse, | +| | | * labled-for-noncommercial-reuse-with-modification, | +| | | * labled-for-nocommercial-reuse` | ++------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| size | s | Denotes the relative size of the image to be downloaded. | +| | | `Possible values: large, medium, icon` | ++------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| aspect_ratio | a | Denotes the aspect ration of images to download. | +| | | `Possible values: tall, square, wide, panoramic` | ++------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| type | t | Denotes the type of image to be downloaded. | +| | | `Possible values: face,photo,clip-art,line-drawing,animated` | ++------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| time | w | Denotes the time the image was uploaded/indexed. | +| | | `Possible values: past-24-hours, past-7-days` | ++------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| delay | d | Time to wait between downloading two images | ++------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| url | u | Allows you search by image. It downloads images from the google images link provided | ++------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| single_image | x | Allows you to download one image if the complete URL of the image is provided | ++------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| output_directory | o | Allows you specify the main directory name. If not specified, it will default to 'downloads' | ++------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| similar_images | si | Reverse Image Search. | +| | | Searches and downloads images that are similar to the image link/url you provide. | ++------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| specific_site | ss | Allows you to download images with keywords only from a specific website/domain name you mention as indexed in Google Images. | ++------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| print_urls | p | Print the URLs of the imageson the console. These image URLs can be used for debugging purposes | ++------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| help | h | show the help message regarding the usage of the above arguments | ++------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ + +**Note:** If ``single_image`` or ``url`` parameter is not present, then keywords is a mandatory parameter. No other parameters are mandatory. + +Examples +-------- + +- Simple examples + +``googleimagesdownload --keywords "Polar bears, baloons, Beaches" --limit 20`` + +- Using Suffix Keywords allows you to specify words after the main + keywords. For example if the ``keyword = car`` and + ``suffix keyword = 'red,blue'`` then it will first search for + ``car red`` and then ``car blue`` + +``googleimagesdownload --k "car" -sk 'red,blue,white' -l 10`` + +- To use the short hand command + +``googleimagesdownload -k "Polar bears, baloons, Beaches" -l 20`` + +- To download images with specific image extension/format + +``googleimagesdownload --keywords "logo" --format svg`` + +- To use color filters for the images + +``googleimagesdownload -k "playground" -l 20 -c red`` + +- To use non-English keywords for image search + +``googleimagesdownload -k "北极熊" -l 5`` + +- To download images from the google images link + +``googleimagesdownload -k "sample" -u `` + +- To save images in specific main directory (instead of in 'downloads') + +``googleimagesdownload -k "boat" -o "boat_new"`` + +- To download one single image with the image URL + +``googleimagesdownload --keywords "baloons" --single_image `` + +- To download images with size and type constrains + +``googleimagesdownload --keywords "baloons" --size medium --type animated`` + +- To download images with specific usage rights + +``googleimagesdownload --keywords "universe" --usage_rights labled-for-reuse`` + +- To download images with specific color type + +``googleimagesdownload --keywords "flowers" --color_type black-and-white`` + +- To download images with specific aspect ratio + +``googleimagesdownload --keywords "universe" --aspect_ratio panoramic`` + +- To download images which are similar to the image in the image URL that you provided (Reverse Image search). + +``python3 pr.py -si -l 10`` + +- To download images from specific website or domain name for a given keyword + +``googleimagesdownload --keywords "universe" --specific_site example.com`` + +===> The images would be downloaded in their own sub-directories inside the main directory +(either the one you provided or in 'downloads') in the same folder you are in. + +-------------- + +SSL Errors +---------- + +If you do see SSL errors on Mac for Python 3, +please go to Finder —> Applications —> Python 3 —> Click on the ‘Install Certificates.command’ +and run the file. + +Contribute +---------- + +Anyone is welcomed to contribute to this script. +If you would like to make a change, open a pull request. +For issues and discussion visit the +`Issue Tracker `__ + +Disclaimer +---------- + +This program lets you download tons of images from Google. +Please do not download any image without violating its copyright terms. +Google Images is a search engine that merely indexes images and allows you to find them. +It does NOT produce its own images and, as such, it doesn't own copyright on any of them. +The original creators of the images own the copyrights. + +Images published in the United States are automatically copyrighted by their owners, +even if they do not explicitly carry a copyright warning. +You may not reproduce copyright images without their owner's permission, +except in "fair use" cases, +or you could risk running into lawyer's warnings, cease-and-desist letters, and copyright suits. +Please be very careful before its usage! diff --git a/setup.py b/setup.py index 449f920a..141e003c 100644 --- a/setup.py +++ b/setup.py @@ -7,23 +7,8 @@ here = path.abspath(path.dirname(__file__)) # Get the long description from the README file -try: - import pypandoc - if path.isfile('README.rst'): - print("README.rst already exist.") - print("NOT REFRESHING README.rst") - else: - long_description = pypandoc.convert_file('README.md', 'rst') - with open("README.rst", "w") as f: - f.write(long_description) - - with open('README.rst', encoding='utf-8') as f: - long_description = f.read() -except Exception as e: - print("Error:{}:{}".format(type(e), e)) - print("NOT REFRESHING README.rst") - with open('README.md', encoding='utf-8') as f: - long_description = f.read() +with open('README.rst', encoding='utf-8') as f: + long_description = f.read() # get the dependencies and installs with open(path.join(here, 'requirements.txt'), encoding='utf-8') as f: @@ -41,13 +26,14 @@ download_url='https://github.com/hardikvasa/google-images-download/tarball/' + __version__, license='MIT', classifiers=[ - 'Development Status :: 3 - Alpha', - 'Intended Audience :: Developers', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.3', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6' + 'Development Status :: 3 - Alpha', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: MIT License', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3.3', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', ], keywords='', packages=find_packages(exclude=['docs', 'tests*']), From 4f5604530c273e5eba3a3f1e66801c8854a3c552 Mon Sep 17 00:00:00 2001 From: rachmadani haryono Date: Tue, 27 Feb 2018 14:56:35 +0800 Subject: [PATCH 02/83] fix newlines on table (#41) * chg: doc: rename README.md to README.rst * chg: doc: syntax from markdown to rst * chg: doc: command on readme * chg: doc: re add table * chg: doc: remove duplicate example * chg: doc: update table to latest upstream commit * chg: dev: update print_urls description * chg: dev: update contribute section to latest upstream commit * fix: doc: newline on table * fix: doc: line on table --- README.rst | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/README.rst b/README.rst index fb6aff11..a4631c71 100644 --- a/README.rst +++ b/README.rst @@ -48,45 +48,59 @@ Arguments | Argument | Short hand | Description | +==================+=============+===============================================================================================================================+ | keywords | k | Denotes the keywords/key phrases you want to search for and the directory file name. | +| | | | | | | Tips: | +| | | | | | | * If you simply type the keyword, Google will best try to match it | | | | * If you want to search for exact phrase, you can wrap the keywords in double quotes ("") | | | | * If you want to search to contain either of the words provided, use **OR** between the words. | | | | * If you want to explicitly not want a specific word use a minus sign before the word (-) | +------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | suffix_keywords | sk | Denotes additional words added after main keyword while making the search query. | +| | | | | | | Useful when you have multiple suffix keywords for one keyword. | +| | | | | | | The final search query would be: | +------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | limit | l | Denotes number of images that you want to download. | +------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | format | f | Denotes the format/extension that you want to download. | +| | | | | | | `Possible values: jpg, gif, png, bmp, svg, webp, ico` | +------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | color | c | Denotes the color filter that you want to apply to the images. | -| | | `Possible values: | -| | | red, orange, yellow, green, teal, blue, purple, pink, white, gray, black, brown` | +| | | | +| | | `Possible values`: | +| | | | +| | | `red, orange, yellow, green, teal, blue, purple, pink, white, gray, black, brown` | +------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | color_type | ct | Denotes the color type you want to apply to the images. | +| | | | | | | `Possible values: full-color, black-and-white, transparent` | +------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | usage_rights | r | Denotes the usage rights/licence under which the image is classified. | -| | | `Possible values: | -| | | * labled-for-reuse-with-modifications, | -| | | * labled-for-reuse, | -| | | * labled-for-noncommercial-reuse-with-modification, | -| | | * labled-for-nocommercial-reuse` | +| | | | +| | | `Possible values:` | +| | | | +| | | * `labled-for-reuse-with-modifications`, | +| | | * `labled-for-reuse`, | +| | | * `labled-for-noncommercial-reuse-with-modification`, | +| | | * `labled-for-nocommercial-reuse` | +------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | size | s | Denotes the relative size of the image to be downloaded. | +| | | | | | | `Possible values: large, medium, icon` | +------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | aspect_ratio | a | Denotes the aspect ration of images to download. | +| | | | | | | `Possible values: tall, square, wide, panoramic` | +------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | type | t | Denotes the type of image to be downloaded. | +| | | | | | | `Possible values: face,photo,clip-art,line-drawing,animated` | +------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | time | w | Denotes the time the image was uploaded/indexed. | +| | | | | | | `Possible values: past-24-hours, past-7-days` | +------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | delay | d | Time to wait between downloading two images | @@ -98,6 +112,7 @@ Arguments | output_directory | o | Allows you specify the main directory name. If not specified, it will default to 'downloads' | +------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | similar_images | si | Reverse Image Search. | +| | | | | | | Searches and downloads images that are similar to the image link/url you provide. | +------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | specific_site | ss | Allows you to download images with keywords only from a specific website/domain name you mention as indexed in Google Images. | @@ -193,7 +208,9 @@ Contribute Anyone is welcomed to contribute to this script. If you would like to make a change, open a pull request. For issues and discussion visit the -`Issue Tracker `__ +`Issue Tracker `__. + +The aim of this repo is to keep it simple, stand-alone, backward compatible and 3rd party dependency proof. Disclaimer ---------- From b130089a1dbd17521b53ce28f6ba3e6cdddb4d49 Mon Sep 17 00:00:00 2001 From: rachmadani haryono Date: Wed, 28 Feb 2018 12:46:05 +0800 Subject: [PATCH 03/83] fix: dev: ImportError on python2 (#42) --- google_images_download/__init__.py | 6 +++++- google_images_download/__main__.py | 7 +++++++ 2 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 google_images_download/__main__.py diff --git a/google_images_download/__init__.py b/google_images_download/__init__.py index ca2c57fb..2d0a5746 100644 --- a/google_images_download/__init__.py +++ b/google_images_download/__init__.py @@ -1,5 +1,9 @@ +#!/usr/bin/env python +from __future__ import absolute_import + + def main(): import google_images_download.google_images_download if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/google_images_download/__main__.py b/google_images_download/__main__.py new file mode 100644 index 00000000..c82f672d --- /dev/null +++ b/google_images_download/__main__.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python +from __future__ import absolute_import + +from .__init__ import main + +if __name__ == '__main__': + main() From 6efc092efb35be7fad00e65a4c0f49ff84d5179e Mon Sep 17 00:00:00 2001 From: Hardik Vasa Date: Tue, 27 Feb 2018 21:48:33 -0800 Subject: [PATCH 04/83] added troubleshooting guide and corrected errors --- README.rst | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index a4631c71..460f6627 100644 --- a/README.rst +++ b/README.rst @@ -27,20 +27,34 @@ Installation $ pip install google_images_download -**Manually:** +**Manually using CLI:** :: $ git clone https://github.com/hardikvasa/google-images-download.git $ cd google-images-download && sudo python setup.py install +**Manually using UI:** + +Go to the `repo on github `__ ==> Click on 'Clone or Download' ==> Click on 'Download ZIP' and save it on your local disk. + Usage ----- +If installed via pip or using CLI, use the following command: + :: $ googleimagesdownload [Arguments...] +If downloaded via the UI, unzip the file downloaded, go to the 'google_images_download' directory and use one of the below commands: + +:: + + $ python3 google_images_download.py [Arguments...] + OR + $ python google_images_download.py [Arguments...] + Arguments ~~~~~~~~~ @@ -184,7 +198,7 @@ Examples - To download images which are similar to the image in the image URL that you provided (Reverse Image search). -``python3 pr.py -si -l 10`` +``googleimagesdownload -si -l 10`` - To download images from specific website or domain name for a given keyword @@ -195,13 +209,46 @@ Examples -------------- -SSL Errors +Troubleshooting ---------- +**## SSL Errors** + If you do see SSL errors on Mac for Python 3, please go to Finder —> Applications —> Python 3 —> Click on the ‘Install Certificates.command’ and run the file. +**## googleimagesdownload: command not found** + +While using the above commands, if you get ``Error: -bash: googleimagesdownload: command not found`` then you have to set the correct path variable. + +To get the details of the repo, run the following command: +:: + $ pip show -f google_images_download + +you will get the result like this: +:: + Location: /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages + Files: + ../../../bin/googleimagesdownload + +together they make: ``/Library/Frameworks/Python.framework/Versions/2.7/bin`` which you need add it to the path: +:: + $ export PATH="/Library/Frameworks/Python.framework/Versions/2.7/bin + + +**## [Errno 13] Permission denied creating directory 'downloads'** + +When you run the command, it downloads the images in the current directory (the directory from where you are running the command). If you get permission denied error for creating the `downloads directory`, then move to a directory in which you have the write permission and then run the command again. + + +**## Permission denied while installing the library** + +On MAC and Linux, when you get permission denied when installing the library using pip, try using shifting to sudo user and run the command. +:: + $ sudo pip install google_images_download + + Contribute ---------- From cb2f5adb1f5f61a6e0c2b5df5ce2ecae51fa54a3 Mon Sep 17 00:00:00 2001 From: Vasa Date: Tue, 27 Feb 2018 22:39:12 -0800 Subject: [PATCH 05/83] config changes --- LICENSE.md | 21 --------------------- setup.cfg | 2 +- setup.py | 2 +- 3 files changed, 2 insertions(+), 23 deletions(-) delete mode 100644 LICENSE.md diff --git a/LICENSE.md b/LICENSE.md deleted file mode 100644 index 3361c0b2..00000000 --- a/LICENSE.md +++ /dev/null @@ -1,21 +0,0 @@ -The MIT License (MIT) - -Copyright (c) 2015-2018 Hardik Vasa - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/setup.cfg b/setup.cfg index cb4a338e..c26b8326 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,4 +2,4 @@ universal=1 [metadata] -description-file=README.md \ No newline at end of file +description-file=README.rst \ No newline at end of file diff --git a/setup.py b/setup.py index 141e003c..65211e08 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,7 @@ author='Hardik Vasa', install_requires=install_requires, dependency_links=dependency_links, - author_email='psuzzn@gmail.com', + author_email='hnvasa@gmail.com', entry_points={ 'console_scripts': [ 'googleimagesdownload = google_images_download.__init__:main' From 2ae6c19bf12e937d38e60fa0b4c7cbb01634249f Mon Sep 17 00:00:00 2001 From: Vasa Date: Tue, 27 Feb 2018 22:55:19 -0800 Subject: [PATCH 06/83] reverse image search issue in #43 resolved --- google_images_download/google_images_download.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index bee2dd4a..0d3c53ca 100644 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -72,6 +72,9 @@ else: limit = 100 +if args.similar_images: + search_keyword = [] + # If single_image or url argument not present then keywords is mandatory argument if args.single_image is None and args.url is None and args.similar_images is None and args.keywords is None: parser.error('Keywords is a required argument!') From bb94baf846cf131b5d110f63b1c02d63e9e876f4 Mon Sep 17 00:00:00 2001 From: Vasa Date: Sun, 4 Mar 2018 17:03:41 -0800 Subject: [PATCH 07/83] algorithm revamp where the script starts downloading as soon as it searches feature to print image sizes feature to print and save image metadata extract metadata from the raw html as a JSON object OSError in windows as : are not permitted feature to select socket timeouts for downloading images other minor changes in the algorithm --- .../google_images_download.py | 434 +++++++++++------- 1 file changed, 258 insertions(+), 176 deletions(-) diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index 0d3c53ca..f0113fc2 100644 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -12,6 +12,7 @@ from urllib.request import Request, urlopen from urllib.request import URLError, HTTPError from urllib.parse import quote + import html else: # If the Current Version of Python is 2.x import urllib2 from urllib2 import Request, urlopen @@ -22,6 +23,8 @@ import argparse import ssl import datetime +import json +import re # Taking command line arguments from users parser = argparse.ArgumentParser() @@ -51,6 +54,10 @@ parser.add_argument('-si', '--similar_images', help='downloads images very similar to the image URL you provide', type=str, required=False) parser.add_argument('-ss', '--specific_site', help='downloads images that are indexed from a specific website', type=str, required=False) parser.add_argument('-p', '--print_urls', default=False, help="Print the URLs of the images", action="store_true") +parser.add_argument('-ps', '--print_size', default=False, help="Print the size of the images on disk", action="store_true") +parser.add_argument('-m', '--metadata', default=False, help="Print the metadata of the image", action="store_true") +parser.add_argument('-e', '--extract_metadata', default=False, help="Dumps all the logs into a text file", action="store_true") +parser.add_argument('-st', '--socket_timeout', default=False, help="Connection timeout waiting for the image to download", type=float) args = parser.parse_args() @@ -72,8 +79,13 @@ else: limit = 100 +if args.url: + current_time = str(datetime.datetime.now()).split('.')[0] + search_keyword = [current_time.replace(":", "_")] + if args.similar_images: - search_keyword = [] + current_time = str(datetime.datetime.now()).split('.')[0] + search_keyword = [current_time.replace(":", "-")] # If single_image or url argument not present then keywords is mandatory argument if args.single_image is None and args.url is None and args.similar_images is None and args.keywords is None: @@ -98,6 +110,12 @@ print_url = 'yes' else: print_url = 'no' + +if args.print_size: + print_size = 'yes' +else: + print_size = 'no' + #------ Initialization Complete ------# # Downloading entire Web Document (Raw Page Content) @@ -129,35 +147,77 @@ def download_page(url): except: return "Page Not found" +#Correcting the escape characters for python2 +def replace_with_byte(match): + return chr(int(match.group(0)[1:], 8)) + +def repair(brokenjson): + invalid_escape = re.compile(r'\\[0-7]{1,3}') # up to 3 digits for byte values up to FF + return invalid_escape.sub(replace_with_byte, brokenjson) + + +#Format the object in readable format +def format_object(object): + formatted_object = {} + formatted_object['image_format'] = object['ity'] + formatted_object['image_height'] = object['oh'] + formatted_object['image_width'] = object['ow'] + formatted_object['image_link'] = object['ou'] + formatted_object['image_description'] = object['pt'] + formatted_object['image_host'] = object['rh'] + formatted_object['image_source'] = object['ru'] + formatted_object['image_thumbnail_url'] = object['tu'] + return formatted_object + +#make directories +def create_directories(main_directory,dir_name): + # make a search keyword directory + try: + if not os.path.exists(main_directory): + os.makedirs(main_directory) + time.sleep(0.2) + path = str(dir_name) + sub_directory = os.path.join(main_directory, path) + if not os.path.exists(sub_directory): + os.makedirs(sub_directory) + else: + path = str(dir_name) + sub_directory = os.path.join(main_directory, path) + if not os.path.exists(sub_directory): + os.makedirs(sub_directory) + except OSError as e: + if e.errno != 17: + raise + # time.sleep might help here + pass + return -# Finding 'Next Image' from the given raw page -def _images_get_next_item(s): - start_line = s.find('rg_di') - if start_line == -1: # If no links are found then give an error! - end_quote = 0 - link = "no_links" - return link, end_quote +#function to download single image +def single_image(): + url = args.single_image + try: + os.makedirs(main_directory) + except OSError as e: + if e.errno != 17: + raise + pass + req = Request(url, headers={ + "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"}) + response = urlopen(req, None, 10) + image_name = str(url[(url.rfind('/')) + 1:]) + if '?' in image_name: + image_name = image_name[:image_name.find('?')] + if ".jpg" in image_name or ".gif" in image_name or ".png" in image_name or ".bmp" in image_name or ".svg" in image_name or ".webp" in image_name or ".ico" in image_name: + output_file = open(main_directory + "/" + image_name, 'wb') else: - start_line = s.find('"class="rg_meta"') - start_content = s.find('"ou"', start_line + 1) - end_content = s.find(',"ow"', start_content + 1) - content_raw = str(s[start_content + 6:end_content - 1]) - return content_raw, end_content - - -# Getting all links with the help of '_images_get_next_image' -def _images_get_all_items(page): - items = [] - while True: - item, end_content = _images_get_next_item(page) - if item == "no_links": - break - else: - items.append(item) # Append all the links in the list named 'Links' - time.sleep(0.1) # Timer could be used to slow down the request for image downloads - page = page[end_content:] - return items + output_file = open(main_directory + "/" + image_name + ".jpg", 'wb') + image_name = image_name + ".jpg" + data = response.read() + output_file.write(data) + response.close() + print("completed ====> " + image_name) + return def similar_images(): version = (3, 0) @@ -184,7 +244,7 @@ def similar_images(): urll2 = content[l3 + 19:l4] return urll2 except: - return "Cloud not connect to Google Imagees endpoint" + return "Cloud not connect to Google Images endpoint" else: # If the Current Version of Python is 2.x try: searchUrl = 'https://www.google.com/searchbyimage?site=search&sa=X&image_url=' + args.similar_images @@ -208,7 +268,7 @@ def similar_images(): urll2 = content[l3 + 19:l4] return(urll2) except: - return "Cloud not connect to Google Imagees endpoint" + return "Cloud not connect to Google Images endpoint" #Building URL parameters def build_url_parameters(): @@ -235,172 +295,194 @@ def build_url_parameters(): counter += 1 return built_url -#function to download single image -def single_image(): - url = args.single_image - try: - os.makedirs(main_directory) - except OSError as e: - if e.errno != 17: - raise - # time.sleep might help here - pass - req = Request(url, headers={ - "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"}) - response = urlopen(req, None, 10) - image_name = str(url[(url.rfind('/')) + 1:]) - if '?' in image_name: - image_name = image_name[:image_name.find('?')] - if ".jpg" in image_name or ".gif" in image_name or ".png" in image_name or ".bmp" in image_name or ".svg" in image_name or ".webp" in image_name or ".ico" in image_name: - output_file = open(main_directory + "/" + image_name, 'wb') +#building main search URL +def build_search_url(search_term,params): + # check the args and choose the URL + if args.url: + url = args.url + elif args.similar_images: + keywordem = similar_images() + url = 'https://www.google.com/search?q=' + keywordem + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg' + elif args.specific_site: + url = 'https://www.google.com/search?q=' + quote( + search_term) + 'site:' + args.specific_site + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch' + params + '&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg' else: - output_file = open(main_directory + "/" + image_name + ".jpg", 'wb') - image_name = image_name + ".jpg" + url = 'https://www.google.com/search?q=' + quote( + search_term) + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch' + params + '&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg' + return url + +#measures the file size +def file_size(file_path): + if os.path.isfile(file_path): + file_info = os.stat(file_path) + size = file_info.st_size + for x in ['bytes', 'KB', 'MB', 'GB', 'TB']: + if size < 1024.0: + return "%3.1f %s" % (size, x) + size /= 1024.0 + return size + +# Download Images +def download_image(image_url,image_format,main_directory,dir_name,count): + if args.print_urls: + print("Image URL: " + image_url) + try: + req = Request(image_url, headers={ + "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"}) + try: + # timeout time to download an image + if args.socket_timeout: + timeout = float(args.socket_timeout) + else: + timeout = 15 + response = urlopen(req, None, timeout) + + # keep everything after the last '/' + image_name = str(image_url[(image_url.rfind('/')) + 1:]) + image_name = image_name.lower() + # if no extension then add it + # remove everything after the image name + if image_format == "": + image_name = image_name + "." + "jpg" + else: + image_name = image_name[:image_name.find(image_format) + 3] - data = response.read() - output_file.write(data) - response.close() - print("completed ====> " + image_name) - return + path = main_directory + "/" + dir_name + "/" + str(count) + ". " + image_name + output_file = open(path, 'wb') + data = response.read() + output_file.write(data) + response.close() + + #image size parameter + if args.print_size: + print("Image Size: " + str(file_size(path))) + + download_status = 'success' + download_message = "Completed ====> " + str(count) + ". " + image_name + + except UnicodeEncodeError as e: + download_status = 'fail' + download_message = "UnicodeEncodeError on an image...trying next one..." + " Error: " + str(e) + + except HTTPError as e: # If there is any HTTPError + download_status = 'fail' + download_message = "HTTPError on an image...trying next one..." + " Error: " + str(e) + except URLError as e: + download_status = 'fail' + download_message = "URLError on an image...trying next one..." + " Error: " + str(e) -def bulk_download(search_keyword,suffix_keywords,limit,main_directory,delay_time,print_url): + except ssl.CertificateError as e: + download_status = 'fail' + download_message = "CertificateError on an image...trying next one..." + " Error: " + str(e) + + except IOError as e: # If there is any IOError + download_status = 'fail' + download_message = "IOError on an image...trying next one..." + " Error: " + str(e) + return download_status,download_message + + +# Finding 'Next Image' from the given raw page +def _get_next_item(s): + start_line = s.find('rg_di') + if start_line == -1: # If no links are found then give an error! + end_quote = 0 + link = "no_links" + return link, end_quote + else: + start_line = s.find('class="rg_meta notranslate">') + start_object = s.find('{', start_line + 1) + end_object = s.find('', start_object + 1) + object_raw = str(s[start_object:end_object]) + #####print(object_raw) + #remove escape characters based on python version + version = (3, 0) + cur_version = sys.version_info + if cur_version >= version: #python3 + object_decode = bytes(object_raw, "utf-8").decode("unicode_escape") + final_object = json.loads(object_decode) + else: #python2 + final_object = (json.loads(repair(object_raw))) + return final_object, end_object + + +# Getting all links with the help of '_images_get_next_image' +def _get_all_items(page,main_directory,dir_name,limit): + items = [] errorCount = 0 - if args.url: - search_keyword = [str(datetime.datetime.now()).split('.')[0]] - if args.similar_images: - search_keyword = [str(datetime.datetime.now()).split('.')[0]] + i = 0 + count = 1 + while count < limit+1: + object, end_content = _get_next_item(page) + if object == "no_links": + break + else: + #format the item for readability + object = format_object(object) + if args.metadata: + print("\nImage Metadata" + str(object)) + + items.append(object) # Append all the links in the list named 'Links' + + #download the images + download_status,download_message = download_image(object['image_link'],object['image_format'],main_directory,dir_name,count) + print(download_message) + if download_status == "success": + count += 1 + else: + errorCount += 1 + + #delay param + if args.delay: + time.sleep(int(args.delay)) + + page = page[end_content:] + i += 1 + if count < limit: + print("\n\nUnfortunately all " + str( + limit) + " could not be downloaded because some images were not downloadable. " + str( + count-1) + " is all we got for this search filter!") + return items,errorCount + +# Bulk Download +def bulk_download(search_keyword,suffix_keywords,limit,main_directory): # appending a dummy value to Suffix Keywords array if it is blank if len(suffix_keywords) == 0: suffix_keywords.append('') - for sky in suffix_keywords: + for sky in suffix_keywords: # 1.for every suffix keywords i = 0 - while i < len(search_keyword): - items = [] + while i < len(search_keyword): # 2.for every main keyword iteration = "\n" + "Item no.: " + str(i + 1) + " -->" + " Item name = " + str(search_keyword[i] + str(sky)) print(iteration) print("Evaluating...") search_term = search_keyword[i] + sky - dir_name = search_term + ('-' + args.color if args.color else '') + dir_name = search_term + ('-' + args.color if args.color else '') #sub-directory - # make a search keyword directory - try: - if not os.path.exists(main_directory): - os.makedirs(main_directory) - time.sleep(0.2) - path = str(dir_name) - sub_directory = os.path.join(main_directory, path) - if not os.path.exists(sub_directory): - os.makedirs(sub_directory) - else: - path = str(dir_name) - sub_directory = os.path.join(main_directory, path) - if not os.path.exists(sub_directory): - os.makedirs(sub_directory) - except OSError as e: - if e.errno != 17: - raise - # time.sleep might help here - pass - - params = build_url_parameters() - # color_param = ('&tbs=ic:specific,isc:' + args.color) if args.color else '' - # check the args and choose the URL - if args.url: - url = args.url - elif args.similar_images: - keywordem = similar_images() - url = 'https://www.google.com/search?q=' + keywordem + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg' - elif args.specific_site: - url = 'https://www.google.com/search?q=' + quote( - search_term) + 'site:' + args.specific_site + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch' + params + '&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg' - else: - url = 'https://www.google.com/search?q=' + quote( - search_term) + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch' + params + '&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg' - raw_html = (download_page(url)) - time.sleep(0.1) - items = items + (_images_get_all_items(raw_html)) - print("Total Image Links = " + str(len(items))) - - #If search does not return anything, do not try to force download - if len(items) <= 1: - print('***** This search result did not return any results...please try a different search filter *****') - break + create_directories(main_directory,dir_name) #create directories in OS + + params = build_url_parameters() #building URL with params + + url = build_search_url(search_term,params) #building main search url + + raw_html = (download_page(url)) #download page print("Starting Download...") + items,errorCount = _get_all_items(raw_html,main_directory,dir_name,limit) #get all image items and download images - k = 0 - success_count = 0 - while (k < len(items)): # items ==> URLs + #dumps into a text file + if args.extract_metadata: try: - image_url = items[k] - - if print_url == 'yes': - print("\n" + str(image_url)) - - req = Request(image_url, headers={ - "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"}) - try: - response = urlopen(req, None, 15) - image_name = str(items[k][(items[k].rfind('/')) + 1:]) - if '?' in image_name: - image_name = image_name[:image_name.find('?')] - if ".jpg" in image_name or ".JPG" in image_name or ".gif" in image_name or ".png" in image_name or ".bmp" in image_name or ".svg" in image_name or ".webp" in image_name or ".ico" in image_name: - output_file = open(main_directory + "/" + dir_name + "/" + str(success_count + 1) + ". " + image_name, 'wb') - else: - if args.format: - output_file = open( - main_directory + "/" + dir_name + "/" + str(success_count + 1) + ". " + image_name + "." + args.format, - 'wb') - image_name = image_name + "." + args.format - else: - output_file = open( - main_directory + "/" + dir_name + "/" + str(success_count + 1) + ". " + image_name + ".jpg", 'wb') - image_name = image_name + ".jpg" - - data = response.read() - output_file.write(data) - response.close() - - print("Completed ====> " + str(success_count + 1) + ". " + image_name) - k = k + 1 - success_count += 1 - if success_count == limit: - break - - except UnicodeEncodeError as e: - errorCount +=1 - print ("UnicodeEncodeError on an image...trying next one..." + " Error: " + str(e)) - k = k + 1 - - except HTTPError as e: # If there is any HTTPError - errorCount += 1 - print("HTTPError on an image...trying next one..." + " Error: " + str(e)) - k = k + 1 - - except URLError as e: - errorCount += 1 - print("URLError on an image...trying next one..." + " Error: " + str(e)) - k = k + 1 - - except ssl.CertificateError as e: - errorCount += 1 - print("CertificateError on an image...trying next one..." + " Error: " + str(e)) - k = k + 1 - - except IOError as e: # If there is any IOError - errorCount += 1 - print("IOError on an image...trying next one..." + " Error: " + str(e)) - k = k + 1 - - if args.delay: - time.sleep(int(delay_time)) - - if success_count < limit: - print("\n\nUnfortunately all " + str(limit) + " could not be downloaded because some images were not downloadable. " + str(success_count) + " is all we got for this search filter!") - i = i + 1 + if not os.path.exists("logs"): + os.makedirs("logs") + except OSError as e: + print(e) + text_file = open("logs/"+search_keyword[i]+".txt", "w") + text_file.write(json.dumps(items, indent=4, sort_keys=True)) + text_file.close() + + i += 1 return errorCount #------------- Main Program -------------# @@ -408,7 +490,7 @@ def bulk_download(search_keyword,suffix_keywords,limit,main_directory,delay_time single_image() else: # or download multiple images based on keywords/keyphrase search t0 = time.time() # start the timer - errorCount = bulk_download(search_keyword,suffix_keywords,limit,main_directory,delay_time,print_url) + errorCount = bulk_download(search_keyword,suffix_keywords,limit,main_directory) print("\nEverything downloaded!") print("Total Errors: " + str(errorCount) + "\n") From 9827c8a95280ca7767155604b373ddb97cf1c9c9 Mon Sep 17 00:00:00 2001 From: Vasa Date: Sun, 4 Mar 2018 17:07:19 -0800 Subject: [PATCH 08/83] doc update on new command line arguments added flowchart showcasing the structure of the code edited recommendation of using sudo for permission errors when downloading repo with pip --- README.rst | 26 +++++++++++++++++++++++--- images/flow-chart.png | Bin 0 -> 33582 bytes 2 files changed, 23 insertions(+), 3 deletions(-) create mode 100644 images/flow-chart.png diff --git a/README.rst b/README.rst index 460f6627..a1df6244 100644 --- a/README.rst +++ b/README.rst @@ -14,13 +14,15 @@ Many more features will be added to it going forward. Compatability ------------- -This program is compatible with both the versions of python (2.x and 3.x). +This program is compatible with both the versions of python - 2.x and 3.x (recommended). It is a download-and-run program with no changes to the file. You will just have to specify parameters through the command line. Installation ------------ +You can use **one of the below methods** to download and use this repository. + **Using pip:** :: @@ -133,6 +135,15 @@ Arguments +------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | print_urls | p | Print the URLs of the imageson the console. These image URLs can be used for debugging purposes | +------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| print_size | ps | Prints the size of the image on the console | ++------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| metadata | m | Prints the metada of the image. This includes image size, origin, image attributes, description, image URL, etc. | ++------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| extract_metadata | e | Metadata of all the downloaded images is stored in a text file. This file can be found in the ``logs/`` directory | ++------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| socket_timeout | st | Allows you to specify the time to wait for socket connection. | +| | | You could specy a higher timeout time for slow internet connection. The default value is 15 seconds. | ++------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | help | h | show the help message regarding the usage of the above arguments | +------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ @@ -244,10 +255,19 @@ When you run the command, it downloads the images in the current directory (the **## Permission denied while installing the library** -On MAC and Linux, when you get permission denied when installing the library using pip, try using shifting to sudo user and run the command. +On MAC and Linux, when you get permission denied when installing the library using pip, try doing a user install. :: - $ sudo pip install google_images_download + $ pip install google_images_download --user + +You can also run pip install as a superuser with ``sudo pip install google_images_download`` but it is not generally a good idea because it can cause issues with your system-level packages. + +Structure +--------- + +Below diagram represents the code logic. +.. figure:: images/flow-chart.png + :alt: Contribute ---------- diff --git a/images/flow-chart.png b/images/flow-chart.png new file mode 100644 index 0000000000000000000000000000000000000000..cdcce3137c71afc8b6d32b8e01efc79948d28a42 GIT binary patch literal 33582 zcmeFYWmH?;wm*zJ1b3$eT3m_-DN>*X(iU3WHO1YnMT%RoV#P{Z+&wtKp-|l2C1?l` z-aOAe_uMnaJ@)#Q%0+w<3MB zy)0)lnsqbQWV4-#F25X=lAIj&6fI$7j6w<9TZDo3^pA$a@z5HdN~L}K zSCK~SLo=)3OGc4l1EcLMQlak(i+v#c_+VZ9bwXfVk8#XNVKjyX^bmJWfhAp}1BEza zsuNxhJ*kLNMwPL}Z204Zn6N8b2>}6aYocPjZKc>7mGc|DwLFW!_eE}>`dpsr?9#|M zCJ&wiKGMFY_bp_V;WQqkkVzhkCXCP}S$8zEcJH5^#l_x}UE2`Dc=IK_Mz+UE3mf_7 zEra?q0|{mt>j7)E?T>ZAKHEi5td~%8d_yyZT?t&0nHkU^`iApOmBpf)v3yWyi zYIf*h)d$D-@?S*9W}lk{UchIUfx4y>K+||(-OfY{x3-~y=$J&;?l+_a{=ovK7-aTO zOiB9DTFX1PpTj}(8vtE-v+ zT8=|Ue;o`or1~IFxQlK<8&B+vR^CAY4>2W0H&Usb<@0GG)`niIT_H1&Dnj zsAM|C@GzB=!20@}K>~X;Wat}%A+DF1(ZwekQ@mD84!n;Er1T0Ze@GpA(1aB2Nx6EM zV2TQf+>glAUQCe6vMW%t4<(WpQvQ_pWWVgM=oi|gbS2w+?#Ka1MBjXNhTr@8>93d% z`Qf{ifAB9A6vt`ZIlqQ)=VR{@xbx}9O@F%`=e(fwrE813$`cqjBQmX~#hmQq733Pm zjqXUQ)K(AX>x-%qF2#`!uIoDSr#k4ysfw2%E8b9F#F(~bWsa_a8Tt!Ba4I?uPC4(0 z0;z&N1J+FNML*(n#;hvfu|Fc6z;nY_4i)ZV?t0Uu*XW4AKl!Z0{>fI!J@i00G5xur zyrJUa3;Rd)6n>xMBfB4wtHoNraOF@=&@BYMB?I$-S@s_NO#1^u$frKGOLpi-Zaxyu zHB{y2IC-4QdYzP;sA5TJNn~kg$!;kjd~P*8aF_~CX=0xg5K(wjNc(1yTj&eXpzuq{ zEwnA(E#|G*($doM(wak2y`00hbFOors%b1)1_KM9zR~z8`-e`O%!98kbzZYgBS5j> zHxW3e_K`lb17BH==hv&?tLJh2Hcx0CK@$zwb)L*Tn&G*AYy4tae)&~ks(*-o#+}&> z&)FveVS;c1Spwd$UV;k28txi-7P!T~#YXj*N;*X%#ph|tQ|LPcEp2xK zSx>)tsRg6?qN)GaLdgL;a<7SfHC)b1P2UPBy(z7f68+NoAHGJH$@E$yAU-XJPx>|U z0Um}++5uk?hL;~MYxX}Sr*O4sT7&p={B6&eh_$=5IbK?X^zp}P#+qsHji8M%j+A{Z zeui5|tDmD!r=MA?X-jN7S+`W@GoM)t`tZ=HZRRUflfE@9GR($AoGs6kII5Y=pG{`O zv821Cu=HZ-)|XO-t4+I2!Zo7960r@=gpIwUc@`N%<0S}=-la)g`g6kOFGW1ibsUMNa`)3D)O00n?;oJkhYZ@ zu8#NYkyIi3=9?ejAFQ>{h=>VE7HB;M7Fn}cmKo96KeCIxSd15q8jihlpm)*VtQ`b{ z3Q7y8Uu4IBR~d_2X1M3?leRV*UeevrePgeZ<-Zuyh-kcqL10C&0DLWS>c@tf+MvzH z^^X#h)|0Lt5reKXAwIqjM7MOxB96nNUnQNkd-e8fs;!xF`Q$R6-8u`iNy_>-o!Pv{ zDhYpY$vO_ZV zu8gi!pq2OV9wl9LAi2!2*I30cD#AM+ulYos!_TF5GDl5h9`r(jIt2(%u@AAC2tBCz z4dBj+Gx^!kiM>yvrBiqL^r{Mjz#S9KR$r_#h5`j@pYrngw*Uiu4hTl5Ug-dlPgaY= z*_+s({~k#x=V_?u(t*AAD62PW^SGQb*Mt?3mn2mujoHoG@!#9uyWNZx*lEppmbvNb zwBE)q?B{zHwOjX&w&yYhQs+_}Rutyg=O=WS^_Q#P1sx#D za(O0r%5{8o#NLCo%gTM*V{6J9=4%igo+C$(7vzpsj~eSlOl`DKUOLCWUVdi;@`4`8 zK15FnT>AaF{&INpcBta0B>I=M)`_l3yN<<4GfwUEKZ2D`eURgO$gKg1F5~Q6zb&sM z9!V4l5@vT+Ax;v;xG!<&4(}lQ#PLpLPbE4t;roq-;>g}e0AD1Z4D@5XM{RRG^Zm$K z!@O8_{ouw^=uGyvEL|7#3(L-dwX3YFPAzaTv)Ax(qo35;MV-jxsCzcCq~BWBL(}9= z+h}g^OYD@XH*ADsji9ESH)S>YeHH1-%B7rGa)4SFb&puD11Cg6kn>Ti}nIhygkJ zTSi2j+(p3b4L9QV67&!%kb{@&e{2H#a#eenaaDuZ5gS{;xx;OKNJpwXL}$@(d{Pd2 ziqyNDfNv$X10VJoU()UeJ@$Tv@Eu{O6R&eRTJ|JTA(8V=yLa3SfZ^5wJXo@2a~>!k z_U8H*#|p=`Ow=K@fy6hyPNIJ9q7jcx)#5N{?QK6iMXRtw|Ms}6Mo}D%QgUzU75hX> z5oO3jXwkJuqy*aQ1AO_zw`dA$bVc-Hn4f>1^2z!_y)phwd*;VR3QkSG*pDV~5)1_t z6gY{Zv({mpDhoq}) zy!jm6IR0yp{~kxl%FV(R?CcJ9a%BB$Tr+bg4|f@M_P-eY^Y>qJT6u&2hm)h*Kc-+RPSF% zB?bSL@?Tp1K?xN2YX<)^qyK{I@2e=hWC?%*|HNLF;28~5B1#>MU?q??YLD@kZcslH z)W-dfJ?dCFeR7|q3w1Myrl#~<+Z+AR2)~g@Iiu|G5gTc0Wf)##Fh`6ask4<9nYL0m z-;Zw+-4X;bQTZZL!-7(A!o!t~Lc$UsKXVWL;Em(PAZV-%)xaVTRjMZ?SMAS9IQI4 z#=j;1?}_wR{gC~?5dM2Cc}J{G-C^PktpBF?x3VW#yb1V^=!Goe^;e}c2;=`-@_$YQ znveL;=rxfgSO|$*lK+$9-^vxIeEGNV|L0h)9IQ}t$NoqEaq52=4(5ME@BbOj|A}zu z|1+Hb5WW8YH=GbLj!NsJ=;IKT9G7@mSk~?26Ny3~zFr=6C?#P?^# zh(7l%5HE&VW>90^wlx#zl9egy(z@Uq{lpP^B<3b<-_YdO8^gEgSTWz{AZliq!<+IR z&m9XLrlDfWUrP$p$a8*L!_Ur&++&CHQS%|eWD=41f4gG`DJqsQVnpzV$_wEE5k2N! z8z;;zUUx|R>)R&6#pnZZ%H%57C)gMpeQ_>9kn?)ST)Et}WduS;d9;u3RbJYcA6yvg znI9Tj;NRzu77pqt9xi+d#b!+ICK*|ls4)KW)sR3G4{Z=B1~yxz`^|^zWD^%xW(~c2)nNaDK?@o2#Rto-QGMb|p1`%s_jyBEa_>OR5foGmy z$_?A5W$q8Fy^Hs9b9|R&E@rzL18!}TpUhv6V;XNPH?8D0{nX<0Y{!`Rc82%a>DOe> zdggwumC*-wdC9rkh=Mpyu^fYfl=R^d zn%0^v3>3zo^Nx_c()CixOY35K&gnSwJT%Qe%I zL4LzHr%GQ&Od0_@2ZwiD9^__J8cR;_F_GM zeNA1@{Z(gccS-h88=hI{iy5-Abj{^8G7{w6-ntR*IMR%aWgqvSw@zMS4iZ~BqHOo5 zZ}QW-7`fCCU;Y8xo`bts_!WG7Hbve__3-`qreL61Fa5Rmg#qq)!3r>)=19KnI=NMK^{h<9r$nJg0 z!b#9#_T^FLVk=i}yj@&&qzX%QgrCku+l521!&?ziZ6nr7)8E3^$p*}>I>tdx7F-;V zPIyMUl%S_DVbM0KVwnBguZgja{ovqhz>e__wQNI|1Gz)ULl5k>>AKZw#ul2NV=gK` zp@ta(u^n2U62I@vzFsm|thPP4pEj^R90gv!Zn!OJuq*r>Nvf(K@1GnRv#y9TBZVK_ zqtb%CWRZ&r zmjWw(hz+$sJk!K%n*`_l%OHdbNmNYQQc@C+F`iOi4E)8?w`W*A;$>@`KcAyy`lP$d z%Yw0<>+k)+sF;l$~D<{z8}CJQF5y zvMwGAOXurED?z(C#}?2do`_o93dU$=)@DkKm=2$`#dZFs?Q%G-K60(6p}7FCob7aA zh_|V*I+(k(GkP~23j?(HuU2LpQ?Semq_vXkFrX4n-9p1;9mR?}J#3jDbT_$hq$76J zD`#bwh}f?YMm$>uA#bOSM{#pLi-&ixg&IW>D6k4(%(mg-K4boN*tS6AHM-Ic)yR>M zbrMU_5P+InWxl6kc!F`Y8F)DR`}h@0P_duE#epe-f@vY%9Xn#GYAj&uVG(gDpxk0o z=jKxA{a)GFUn?b*S4XBteCtDe^#=~*6T6{{dSetYj_prv9~LK4j=u!mn%zZ@!Cg{o zvz>S0j_x zV%6+0;ownu%dsl}cpALm(6F>PoFPIu_$)}w`6+U0FRg_uM{nP_{z%&kDaW)|zwlx5 zj!sPh&l?YTe@J*-7<6xP4gXl=*e9m7)X*;RYP1)xU|xbcO~B+h$M0COp_kgA@rl;g zhus%BwZ?KhQ(K73gC#|P%w}aredQ2;OQR0tEXDD{Hh^PX5Ec)A))+=q%@o0sAb+hM+qcYMJojX$3HQnSV>VQQ!hGfNv`G~ia{uv?IHJk|+6>!( zw%}86I=gR;n)vQL7PlTwZ#1^aatXgp3%W^rJMB`O?HM(bqgAXKdo=9G9XXR*m6|Ey zl=yd5;gl5^x?n%4w)9%r>OE!1V}Vjvt{_+hYK3r2>d>%9zx=pDd8V(ban3rkspwT& zGw-7`E(ba&K^8VHv$yG?gzCh}xFWzq4)Hslk<%jVIhL_5WS+@y-fO9A=e@+;>!F`4 zvq7idM?>X1@v{5|D%M%t=X|4MmjK9f?}oiEk=SX1qAl-A-ZvR}EqU~J<7m1}>)AAc zf)JC8Q#Cu@zk<#NxS)u%sVZ*B-rVt2a+KFsCj}BTmPWN?&Uednt20f!GwZq4wMsG< z+#iE-eD|L;++Qt-W81giU%A=j26*at4L*D4j3s`HK+pr!hU_bjs-Vm;dP?c1J0k}V zRncVH(hb~!KeoQ~uYGcQYF@mqYyy0**}KU7l9dp{nLlv%f~7~A1~$@Lp+a)#VsV@O zpto6)`dutenWG7%#-$~{lQxY7A<+DHI*&EZ`rMPY>*b7BfykR3CdQux_jvy6Lvo%C z!-2zXs64g71wdTdT*i2^2gg6GNABOdQ8(US`8G+P)I>CtNd!(Tl_pe6t$p2NY@Q*E zQ-*2$*rJ@_$C4Tl*JDKcVUrrscGOjHd=nM4zlR5FuW)A?PSk3D!MU+yAa?sI$7XOn2;AAH=}bgh%Y746Et*p6-F)7Mgf;oj?<-jihshp&Hns&qqx5B?Q&lR zfXe-<8t7x_ZGqO$Rp0QOd-SM!@H79NFT4i$4XQYiiQg2j0z6=0N|MzD#EYl17V*6R zD$^gMIWh}O?&Hs4<&Kin3m6ZT>b>IKtZ$TXNp~q%;8&_|r(Uet7-Xw*;}e!>T!;y zfht#$ZV7?Qanf4PuZj@<^D1)#X$=-A)Qd1Y6 z(@iqI^>hJx-VhV`?l8hcrqcoV6Mlk4SCRJ%etKOCo(!L&Ure|no;Oo?9BZobVJ@p^ zJu}Jk>(?Js_z*Q$G&pJeW{YhrVasszVfEey6Ek?=ZRl(;`?SYb{Jy@0X=wMrv)3nGrlmF6FiCl8*thc%$P&HE^xa_4l3H=}D2 zEXx-hZPC@Mnu6DMhD{%c z83cqWU9WxuG;4@2i6Y<4ocQlZSIM#@e<$`jHJPTmNW9X$gGB z?0P2j7&-p}%nGFjJBBtG{nO+=Kw#K;9JqfhK2UUjkf$QXOG6IULjm*dv!PG&E}{b% zdd^uXHe%-bOYCI1F;dgv_0R0!dh%kJIB)J1=N`vIcdD&Q`EQg>KKGvl_q=1~7{`YU ztfQG`>&rAWNev8)e}E_trKo{mpx82AQ8YS*oo4Kv1mO#RO-kJ7Y!fa$jr0XH zcQC8ffP#(Y<9dOY!uMty_ZP)-u^#^LwFrg-gHoM%sr?@>GE$f0S~al);pNXf<4)Qi zkX1)C`YM_B(s8*m*IdUo?KvCEtvi~>CAqglo|JjOFnKL3*{dR!riJ629g@IF{^Oxy zNC|lU<#LSbFKV{=PTVm)6D8N5ml`kq4$2z(d{|i?1im<3ps$(j>GC5WYotc`YQmZ< zZS;Sxm#-mS>Gqo!3`scx>;=t4k7Zsl z%P~s6aNv&94#Y9>Q$Zs0mY$UKi)3ViL_5&XjhQW5L74IS@O8tRTOp~b_>S@ zu(r*q)81CT%8$r^u^eZ#iK1QslAUGhW0UyE)e+G@gyD^L6T;&R0JEm`0lb!dZ0sx8 z+6KOG4B@qBm1A8- zvN|ndGj%F@j>#eJx|>5Ag+XoVDyQE~%7d6B-|_kQ#~90izL+g4+HJ7WQF*)E-Usd9 zMstiE`*r)ZPtXLMmv>~Pq>%QOjZkj_@Ygt1g1=FAnlbRR}V?E9zQrUiK-!|9J z$n@3MHbE1jm@L9THP-+Ka|}+;xLfXSz~QN~o=^82%@QL;NAbh?d~5zo0W1P#v;fj% zGKA&twXb&Nbi%Si$N-Dq0yQZ@#QbYjF+%v=DK~vALweq5%|bM+0N%UKUPI;>|4`Cg zg=L9MEAtSNdoIWx>G)Glsu0gJ4QhjrRGY9}fpi#Y>WRuq1e5E?_2J?nt8qs}U*JVR zy;-Zd&#aK8POy!?Z1si2#}j}4&09}LUt#JaAk9N9h0#_vZpeE$7oeh&xFc(QfnB#` z6;kG4Z_mDr;T9oh?cyLTv2MnTJFDlpa%Q=7XiR+@>hQRnV$iP0IH6{QeH_!L!x(cX z!+<)cx?VvOQ4~rKdU`Q{V?E$mD8(nto#7ya4imn%xeZ!dUUsm?Pb* zAv-H|3!;zRmB~&ZBe2&5=%9e>?vVmz!sI#Ye*{kWpphXs0M8EuUCQ zlsbsVZfK*yok5>FvwB*wjfLXYykp!>sgNCV)*?4RQ-4#H6*c_0k0YI)*b@jwGtaME z6u$jc-mrDuG+y8DYg#=4Fc2B{)A)Q++pwT?9dAv*9_7m+mpm8Zz$&5Gg0d$P%of?0 z<$8TV9HA`x^P_4Tk2lE5Uy1yslT>h>@ekY!Wu_RBGDirrwE@hR_R_li>PXbtgsY3V zJ%eIl^aQaR7I0u}xDv_!E}T?V2`%6>o25zcIC(eIgWwZ$PSngib-$h}|CiKA$`>{@ z!CY^}27(XoJ6c%GTJh^9#BnTIfxw3C&+aJNcT#8+^4%}W|<0bm)s6-_fbwU#;e6fzfcD5DXiG5m-OO3DW$8eN~WSB66l8{=m z{ApYolT_D2KlO6JBqpO{qbsO;i;a+n=#|krZl*AK9kRx?hI^3lZ}A1H)!7CY`weKE zHLzalyP{NgdLN5-$Dr9?V3#^k$63e{7wsT~ibF31o@)AQQ6>!*b7Rf-iUT3`!?!9A zW~}FE6Vfjmy4MJ=8{JZO(_u8>7;? z8vXfqJ8r&sqZ$w1HYUBena8apRYtz{lY;GLOP@L{a#6@Xi4W*v9JpjrYb#y$3>~eU zc|A>c@v?TU*D9mnkL8gq*gM z&2NVRSyJSDuOD!yVl$|jdE;bI+EQlHZ*KD9Vl{g``Wkh zyZ1XNo`Upw?X0Sf?<^Az(%c=lV2>gZT-c+zT)vaoTkz<(!d6~ne7ryu?p+`Y+;^qf zV_wRsad*}TYaEqTh3N9)>qR=?TSB2OmeRi7!;fPv;u?gA1BA-gc5zjl>1AyK)i%%} zfPk7%QCX{Ui=~>)rI++D4W1vd>NrPHKIL98xOg$#6_oltLzcVYGwD6KyS;#k_tZ&LzorjfNEfMmZjU}mKrMfsXPh$WBUOnf zg=C_faS0owFGMgvs@#O&WTbO$wk4QCBWd)rIJruW8fZMgr#wQE!$-JAZ78j#+Plki zspii@&bH99+E$*wX2hM#NDKZqt!3v!JP%`Sq}iB7;JL$B$x6)($|g@JKmfi)`6yw=2b~VhUoU?E`wGW~B&kieTh3$w$egX_b%>iO5Np zHeH%YIrmT?#$e^Gi_)sxD+>v%Buer+^Qrkhfz$o8Z-gP*8I+&V5Rax^;VHHkQ&OS& zQq-}Gip0Z(OEIL+5v_d8kL*ZfIJA4%+Dk(Q+A`wu?sQYLQ}Sv*pEsNsNU58OnX!(& zu0#mQ0`cdQn-fY|px(uF_&?J2!i$6N=xYN==f>LSE=?XLw_w5Tdsix7A0IUNb*wnQ zlnRGLPqQyDMFveLm9sMgpn5V9>PO8ict0Er>JF0>>M1sq z6I+}-bGQr?hvP6hG={}>kVlWUwzL`aew%a!2Yj1EJW8SYs+BxZ&jDdVS$8zlm-_xU zS5td+S?8IaH%A#jkjo_l*))Z2XnsBJsHZ?JQ?n_OB)YVkYmMj{FbBw*#Rq?K$g$-t z_PZgKZ?8iRo}0e)hb%^`Wk&>^okmSDsPwZr8zx<~%2ta@@bZoQJha8KVLO$=z7~S! ztFJhsQe-*T0xD`xd}uozPqeBkNn()-M?{Y3By7l1VG5q+7OAZ}YDy=a+d2BO)~A5( zc1Gt0Ies^&>Rn)85HA2f#)1x8`Kx)irlc@-Gj+%EDyG@~OrVz4eqty>Ri|@bD=>ZJ z!p-71?Mc3zV(515lWPh{{iT*wNOrIH%MGn2$|x(D%%v zwG=h%XPO6?%3t0nF;8m(i>VWxrh)UFsL1YJTD;fex~Nm-eWpmgta^ZHukwwl8i*&< z=5G^Hk!xi0dvc|W-lQ<~Df{?8ByK9n8tfl0xEF}k&d z_>(}b;8wA=s)aP^Z?aWqLdgTu(0SAPY+qo8sGH_Tf5}1((JV3dQ9|FWb1g2-c>GcK zzC`@bTv&_e9+saHxQHHp%IyqT-R>CX$iWa`ml2OzDba3;FSzT+@cZ2L5F`ZDaDpqG zH0Y=U6i5grM&w<(tr98rg1T4y!D+^W4bQN~#f9jWZf>ClL&1giLbwIul>UKAU*GqM z5`h@fBoJ%^rej^0trV=H+h+csl4EKn;GzT0*B<=q%_zkXqOD@ecCb)C2ZHo6pZV>+ z$0ErdMfrlees1AeX|}hf#><6rIQtEb)ERtGxWS;!J8lQ}+in3wBRNcb0)duhBX#<_ z4`-%xp77c}&Wi2<7@Y%APn=m=XK5!v-kHuKRNPGAXU(JWe=BLsL_vsL50z@MJx($E z=-UHhAiZ{y5L~}mKp;ymeLUv0Sbwq)=FGIsh~wGG?M>m@*k#Y61KZ!4I#`o4RLX8- zey?Jzh^VK%c%{#!&lClO992B<)FsY+s2BXLNVsH#i#>u__R{rZaOOT zjX|cvp9lrZg*<1?A|Mt3)z{xwbgE;(+kn&WupASTW4-<^OzUw2IoybR61W9!=%vjD zLm>e6WRq85b#io^4`PPh7 zYtQi--VXbj67XcXTjtc2yV9RK8zxzNey-M?doXZ??}oqdx@?-zCo_jOh?_>tz;?vu z9v7gcj-ztQ)>j?Ra{L%p9k0SO6E6#qs856qwRqqov%&%`&xz)H?LFI9X0UUsrzZoM z5Wl&o)86%VUC=wKQKfp^2%ph>SC2tTeopx$Nqz6`NvyEFdV*H%j!eup2U8I5L=%3suMeT5&6&RJLp43^{Mzgq)R4(nlW5CjQcnZoBbp~9-stb(<|#$H`J4T1LV$qs={UEGU`C zt32q<&eA%A@#<_ot2S3({6g3yhCqx6L#~ARFf1Q;&n4Eouy^(X6TyD`I98`NcS$7! zPZoswet~Wyy|5MKkb)sKjQ7iD!jLluv~t{iyexHoe8SXD$#VwFm}}=avC|!G6G--q z@)_~PCM>Kugyyx$!{*364JF?%RU)yVx0TPMIQnHNP3@~Y`-KJNpI-%b^mK$%4h7ra4{$r=!_Gm?;bz+m zy@q1@PK0!E;+QNq;aQmDbU5PFI0=eN3h2p9J3~7i3B)gR9wk>^51AJ0+IC${MwH7T z!b;Yk2M!s^o%Sm_4X|h>`3fUW6qB&)8t&?a0od7}8rjdKM+Mn3lsjm&r zynCBi;GLx_KxfEic0Vzeg2E^C4Vh&m zPx8lFdgTIT8e=nIi2@OJ&eDRoGpm5C&_FX1%jDte&^=@3ekUl)rg~+up0b?DOBL>%HO6E0gez}N$^2zJ0G>)kGQAtz z16&3ZB$?sItG;xSIu0G|qs}Y-9jbNb)XS!k2D|%+M|qIMaJjV3bN#Pne_pnY7YmZm zVrCVzpZOnJ40;IanGPN(ZFu!n{5Y=&IGLV(fvjM;YyQFeDi?4~7^L}9i}@Z;cvZ@o zbe<8|cTZSPuznf=`9j$dM$3gXjD=tsiy13R_VbA1B%PI;0}v`zggE#^}Xw zwDRd_rxBeA&(awM%rgf*#*{Jki-|n?x@7nM#e~cx)z(H9CQiEsj%SZjC%OmGfc5Xm z!57S6xb?u;1;(2U$YEjatzMjp|6#s&f(1`N1+G|awdT5-a7>w#5*k;|A8+9Go8Yqmn5{CE7gn5O_fW*_O1DSCCFiIQ=jomxQ0ri;3- zZsG-Nk7e@aI0O=q{=Il>?jIqffg+1q50koLT24X50L@LVYxZK2JvU+Lp!j@LoG z_tRpozhqFN!{xc;pEQd&p^Wh=swF*>RSUK!%g0^_UC{^2DVFqfzRilni8Iw2rl`i; zFDh?a_ZDBa>4x9F)Yr4#q1*XN0(MO;pV|PUo{Ep@$+d0ey|3Aolr;VnqB8C^i7_9x z1Ii6@CWa`T?i@)>+yVl+&sJ}wE)cCn-DFqqj4%=0jM2L_?*Mh4yzkpwYW@HUdtLyD z(u8r1Yd>-u5aT|KV8d90tX-s@H0Zx5-t^8Nc)6H<8hh$vZrYtoOjtsg)*CGyr_uti zK15s1cClWZFN|=fHr=es8J`h1^d=XDh7VjL$O0%R$Pm92oH$UKU5#NZ=II!wSfG!g z#oY~w$nr?=ZM}RwEJnh()a;Hu?%_@uxFT}zpR>w!*`I5rtCHLuP(pELX{bO#0vT)l zh{$uyTbdLc)&NY8dA&GO=n(J8h%P(MmP!qAnxM`fIWBNoi@(h@n^7N~ShXdzl#3Vj zDO71ukmYj_k|A`_=>T@^bujtIx^1&;cD6>{_VXNt@T86Ga7<-OCLHv@42`+QhC1Z3ODhKHB{6Db zJ=E+u9@gT=Vt_TwgPl8!rypuo7g$%NPjkjJ`i%P0xFe3Jcs{K-3mu2?9$>fN10!x3 zb1zPN&&v170w@n8FXn#XN>Z0BchMb0U{eqglfKTtOs4!?HC1^6Gl|B%siso&a;g7n zBEIDfix(Rv_YN70)-!$`5u?E$eM`VemI+Mg0~j(h9N91CAHH5SQbJ-4-lL;F8x6DK zc@GVs40xBV@+T@Zd4vN@1{@U&<4SEr52fwu77`(?G>H`@%0)JRs($0g1Nmv8QKc_ zyw|meC6jJ*^BjBORl*jKE$yO%>RQ3eO2wU)%Ityq|YFVIc$E>M+%#<9gMpNoxsn)1KM#FRGE@n=KI5jP^ax3 zkD**b(tVN4o+D1Tu}Url;Oz1M*Mu0H3K_mmuT@NzSM{Q84CfHA_HjLpHQnsCf??!r zCE!G3sY}4SC5K#mfFY0TxTG0gli-$M#(|}kb<1f+e~Sx|qm|XQWCtU z5u)$KN7K%QBKS5Xbm_NWKvp8%U>aPnk2UF*)^iKzt813}h~k`UVesuL0;r*WFz=2{ zg=+D@T);Nh2H~Owa;?h26?pXqG8J+V*$hYbH$?~o5PQ{zHos+lYBJ^)m3kJUUi`zH z`mp2Ef!j{D(Elp6?3br9X5G1$D?|J$t#1K)f-++-Ks#ok(z@o(f7xoZ+D~^!5Ale3 z19!X7g>`7_k>1Ic?p24bl z5*}TqA+^A&Q0Htd4eGCZ#JY@g0WchJOB5l^^WIk+zc- zdB-QOv)S3K-*#w=BW2l0T$+=b#>A%~lEvpB9lx|+d{mlzb{y@HT|FnQYpCDe;9{Xt zgY9i-F)f>GC|SX5z;KbpA3(;f&m zd)vUJ3g+tpwan5Dggf`oZdTj7%1dS1I){>|1vQT0$T1w|os}{JedaCuKpo$7s1#EW zV5$W!yX?PloV(a5o~A9(1)ev)kW~wrgiU@}e*OtIfWzDPseSU3-Drd){?>Px(&x0K zO^FKY)=o_vzphb$W%~J@o7i|0agt!Is4FigTOmA=b({}5kS{`=GyUMeI0xVHX`j=r zbDgdF?2<#7Ky$)b>w43Fh3AF?bwe8BzUl@jNuG@ROjntFIo8|nxjg9c$+!h`?Ngb9s^M7R^Uju}1Tc)P^F0*m)bM2og|_KHU#B6TLd}`{ z8ukqbx!`4DgaZA~S6^I>oNW}V+*w9*F^R-Uve4Fvsi6$*g~tR%z$kRIhQ8qtF1Yh^ zFT~=Vq7i5Quln4n5`&}u4Pnt74LPxwY!+-ov=$M0l31)xD{_j=v^$5Yq5QnKpHf*o z;VZ8+HX4N!oK|F5d=-zm-QJ0)Y-yEBn%akm%H4_qE&T|n@_gvNm$@KSY6v#7-L^RvC1ryAj_1ozdmw|%HLrx zu|!}4T9w_;_~~woK8B256ns zMV^@oY`n?AvcT3eDq^6$AuD%y>ahhoUQzVhbm4W?-~B=vv*TPV ztYOj<_HjUv1WYTe8Kc4d;0g68$*pfO_84o|nV;$d#L?2+k+KeXb$j!|Xlt*o9ax-c zsK=>UEu?1Cp)%VnU&si6qo`$tq)i2%u=S|%r3(e5Ay*tAW$9<5@m_Ium*SSFiu}Em zgzVj~Y6p7oRtUGNwd5&4j&VDp+Fomotxkb9r1@f|(u1*2sY>CSFrKzU@U6k`dy}o| z@QwJhuHY-0X?uGF1s3ROf)$j#U97()wbW1=(f2GZ&3X~xQL79&;;a__Pl^;NF!b3r~{xBOt&~!$g)k&+l!vM9$FRg-^xBq~=Ec zSr^`qb|Prlj9OpFa7p)L1i9qoSoD!(AZxd$Woq^e8`D0i&VPB1Z_U00QOHvgXEAd# z!|tEK#V-X%s|X`ajzl&Vyar(hpH)Bg3KM}n1=CT>Edky16r@>1pCg{f;tz5@PPOK5Edda2~O^g+dwx2wO$oZf_X8^dDZkZ|LjX6}VNB{&@<3D}k>Jf8_-%X!h<9MOECBT0R*JZY!@ zfC5n+qxEzvl68+UIYAEpwCBsC2yf$#0)`vHPmlEc{YEethV)_#g3#mDss{6-?rR5+ zQnVW!>o}yTllwT<6Zr>TZ_SWpZjsTqLpt$&jFlpYX2!W_C%oA!EQGWje>lpvTW(3L z{OlwNe=SMP1vQ_*dwZu!x=Z~~`Tw=|U13c{+q#GdA_6K(FDgoqs`MUE0TBU_-g_?r zq=k+Ms5AiqkrI0Ey@e_uARxUHdJiQb1PBRtx%WBy-gD2s@Au*G60*LPl{v@!N1Jo} zBSRH*C9tlZc7R3^D!nwkyTSRfMf$6d-r;RWTGj}lGe#|#G&ZUHYCoCc8MUT5Et&xx zQyvbko8d#9QG4{E+V3@;XI@{3-}!kI(3sZ3_X3Qx^w*mFb*sQ4a9Nnun}*uH-Qt1d zV2|)X5M76|x7Z5W`IF$0>`$@b=!-^X2f08r`LP@WCq=3a^Q;q7?=iBck`J#H-R*qL zmoVc&QY6(bw|1 zgMUf`W(OsvGWP^(xX&9gIBP{1`-K|aM@y%cO8-0hkFrZ&pfbhk6rWp&KDsK|PrXM( z*{914cEO-49F$W$-;7Etw&>a=2cZ@0ZJ_y3ndw(4Q>T~Oa!_;PxIt!^-qaRoN;2r8cI=|xsBw9tBFFP6Q(gkWnLW5W$yl>IOVcCATHA2}Ms(4^An%Eo(SE;0_ZFCi&Q&R@(7!0pr zRnx(y7vGjan_0sc7dfm6&@szVQYxtejMJT^shz9pzfk(mka7a4@xP1xs1~ZEekmkt z-SpTl&;H}B-o<@AIQ`{PPcvOpbt|~nNx-f|(!341UjfqZ&ikX z@m=sm2z0?$J?5H#Fi|vt)1tY|q;0X(7%nmC3)DLm`zEiYP59V%;4z zS=TMGr!G^1JGXA?d2Luxi`q*eZG2Z-uDpq-c|+euz?2&u zbHOCHbK-yG?CtZ?qgHz;>k1i87bc3}xqyGCLuk8+_Ga~2yFM*qq2y&5hzvTnOE$21 zTW^zYvMO`BF1hl_E=Bbd?F#>U2*TGv)bq`BM}a5H&$0wTPabo1XVqWYsJJ&kxRc*E zt%)CMxyHSqIF!6lu;FQ->V6R8jcjjw<@vOzO=xLUiTeIOqd{tSg~IQV?dAIhYpw3v zFS_o$NqySGa?*p$GKw5q7?0|uZZ=7}B2%&=&D_z949t;|4IwA960>jt_xzYy zkL#L^{D0}Zddk3Hc{{YpGkepGmt}nR7rJ%?Sg3kJ9+IuH8pu=mAI&}ux;r9xeLNJg zrn?u*cXryT#dnVS*afnzU}xAI$>%&D&Sr1r2@%_qnLB)USxh~`w;gtKAB(U^zS0=% zcaCO?j6g>ccKN;T4|85B`1uX@M@4(Gu&7Xv^-bl1hXaflF|t7A^$XU`7w%tHNim39 z6L#x6nElWMevZ18kFMDZ&5ISKl3YGUt`RDv_5M>5K+F?Q*&%4wLj;i)VXSS ziindw_2ffXKUv^;q95_=1rL(vR0>BbQz`N{EM=FF`FJkX&@{;DLsEi*;^oW;bSjaw zT+wW>x0m2ci-1uJ^sf9tMU1_4tu(BeasK3YUnVi;qxo5|UzU0JujMH1HJV7aS4;!c zYa#FXrgm;ev;C5cduFIx2S!-i{a8S|kSC7tUtUysyjUD3;-fv}TEakcoe<&vHnwe{_QvImpjar*MpC3dZPes}oVYT0Eu zxIB|#+5T<)GS)m*D6tkNNV zy@g3apNi8WL$=xGnd_O#zFsQ_#B@e@O$P|zSyS5_?qz?UzC~tb91QHi-0eUa!6ZKG@YMT-~Z}g=h4gD0H3qpQ7EetKZpQ2mg)ZO z1;BSz3j8jPlLjD{Cl(5$wmYeMu7Xc%bsP2(?t|eO@4OfRUQYgPhzV5p6C0g4Jr?2r zBL(`q(N%5*HhH)a{=?&xMA?Be%JB7tpxe#j^sbv?^xY0xkd4=q<$M9aUXgDO&c2gH z?}Zs%mz=K?4Wtm^BPdR*7?zhK85-2%pZ}Gwm067TNIWc?>ihDVE)OeU52OGd+R8Je zXSAfFbl*ZC0`9!9W2FlC_4=@WNLbKmK7&M)&Gn^T32?yp(7pw%?NRidas~m3L<|^rdjk0^Lx=!jPw6W{n{K)kmq)`t17CVj%OrUhIhEBd3ZIJh7^8;? zK*JX!3pA@&d-o1sc$%ESij5G;Ol!I&n!DQN0##l`Q+e`}*H6Y{M{G(L(Rs*uh1K4z z!Rm({TEdQ?dSA`S*tY@*kzlU6ZdIg`ioeB0YhXYi=n|q@# zQWq5GzXu=O!Squ8O7`=EcpCgmip(_X2O98^#;woDSq%WQ!j*vmPfZLp8V3_ew$|cUBK1x zqRZm|I_54aE>Z6}F_eDa@Cy$)@ub>8)5^H_MUTC_1@T|K$_Dhx_Vejtds;`$)#sH3 zpqQhXU;N9go1(geKR+AnJ^A_AX}6Oca@HLgH;sRU5##A^Zdi$)53s&V5>vm#2RpLk z_aZ%5UePpdZeK0o4ewaPKWlLIn{(my-|6TU+XP{kD+!K|a z9T8kmNzJ%+3K(W}dw?XE>zrQ4h3(yv>+9R3#A|Im3eS3yMKFK0HBIqP&7P)!t4nY^ zbivL)-mj)3$h3mrCk@V%hwp6}*W$nZJoe1yyE?y9Yyo))IFD9O!CrIP@GQG>PoYQd zJc59KuyRh5TVVC|3B_$ShD00==0(0&M1s3m&cmF5XFMaXEq;(`k$fA#(EHsQoqEM| z!+q7WereYS8ltS5(VtsY@MR=2f-2)CH_2IiuK;;1kc9YVioWiBNwSY$eTT&}v3apY z8bbpHqf;K`p&{c|K{(8$2js!u;hZ!PqOa{2$``M)S#{80c;t}ZuRYG)76Hb!;2i9@ z4rWB|i$9rNs6OLZT`0L~)8N(O%p__yzeFVurmHQE`BXOW32@^K2DyR~9hIkQh=7dc zw~jqFd9mjaz_9uM!wvHvD{O!N)E>6gC4Wj~vG}L-ij3F3q1TL6QsXrWT7a#V*I*GN zJvo>vE?9p4+JuIrX+q6`7}8r*=($Hf&Sgt-y#N+X)I#+|2JT&(!U8XKJ(QViII_s5 z*2pKRb+{qhWOM^qXXTNbM(V_fsBV4diw1WY!D|!$63Wyi84=Otr4eP8AQ2INz+W4S zOe3FWkx!_gR(eIObl)t#`}&sJhc2#H^F;F6*ED5i?>=Q{XS<~?D?sY;+=Yz%W|+L3 z94#5iy$50sBT61V|8wi@ozRaTrDdOsR0;a~h>3=)V&U}%Ud<3JvOawi*90luY|-*) zDr#!-jS?jWiCx!>Y*&S`ve{q!qRijOJ4?1nf3n&WWl-9Zz^&8I=ulGm1I$`Xquc7& z=m(H!zMY{r2R?(+Ih*&Jc6VZ%e&y33Ya$NsDI~P)s^h{>?i~P>6Lx>xNkRbr9D49B zb@%Cd<7Rri)j(q3Mhe7u%6KSQP|?^IDXbTW$5{QzkgKvCd8k)qsl+8MoQ8V0sYk^s z)!;lc@?3Mj@%?7lV!uDKcB)KXDO-bAy$1xXAB&uLfDk~ZU)%6V2<6x} z@#l-kn%^Z~czn|_r+d}I{c{MyNs|~~%VFnlP_4(cR}5P?{EQ8ZHy;+@<47fux;;^> zL!TEBRNi`SkMwfObXrX^U}k zJz$h$K11>IlW19Ej1LZ3KQJ6{A~@qdrm~JhTn0F~E`EM@Huio$tyv&<`6AAGp9Tu@>)ozc!8;S4_UkYVKqcsfJPU;OZbgXEV#^{=SkxVSS^iFHRM{1YP-W(HdI64fwbRrBgm)7wK< z;En-+RrXPrC>K?t*fy{qbd~BWlXHVyBA*dg)7XAV^-vt_Vhtqxu;Y0vGf)Opi;$N- zl6X8rm0*MnLF(S**tv6a9)aSl?io>xQxjkR#z!cIaMrTBeOF;<+aw(+1WMrKxc^lLPQ@UQbF$S2+12diB380U^{htM zRdAkI-3vXfZke_7+dfT<0+ur((7vOG+p75Lno~OP)}i`GRmC*VlNFD9!7Y*vtr|7} z#qdFwHCyg3*Aj%wA|9Ee_)HqaLa(#1zMxt>B*~3O1M@V!H#O|X#ywyuFrt1o|fpqa4#Oq%N+0hQgGMr!#_%~US|h>91! ze0C+cWk+l744|IgYem!;UlzFKde5yw-qrwc43)3skJ^zg_ndu2)j1%Sv4@n$*(Qj* z{+6v}1+w}(c1vAd$HcowOL9EtJ+IeKMV$ z7ede$KMr{YZ35OeT|%p}L~6HkNhyd@knNl`3^%jENfPQ8Imo-#r@KavZ$EXgtm zTE7{L)-#?p8K30Ut+-!@*v`-gTjwq}h5T8Rw%x{8#;VdzspYM@S9=Gz9nj%;=A&B; z7iJ3T-lpvvYDOzL_p*3>>x&14cVHKNA+~MD;rs)eM&CXM0#N+SLO_CK;dL;(VsW3j zRXSROLH;NITt}sz_|<1oziWEvs!<1e1%{Mijwm)D+ zZ2~N5%9-5}TyL6zVR4D+TYY63p3#XR-qP0cw?JpJphP!pR~awL7oI@`Xp%lT3^U6cJ3c9*c?Jc@JG#eYP_7eX6x&=3%0v zeJeh=C68(s$#ZO~JTka@B%5DnI3&h-!jh`BZYO(kD1>FZR_t6xVR$ZS9j!g5MUrGA z$@`&IEA~OT&&0ESyZkiTXmKe6bbUY>e(4i?wD$`Hcb@-ir_WxqqBRG)4JT7M4&@uU zc#t>p1#fHr67I*a!Emiwk;-TlM$KwOK$8kThPL&ojbY3?(&EF9xoJ)c4b8EF^8^&s z4NM6=L)Fh1KBUwYf6i!eWTqL^U@vCwm-RsH*UdRAcaP>Hx1q+!ntZpoR(g`AOt>vV zQ!)!b-)!5aIgNQtav;5%1+$V|eec)y&g?5b$m(kzi$%((&C$0psp zcnv&y*{5v#*qjD4b)`}q>~ehM_)LQ=dgnrRa;5O5#I1V7h=pN8zoja_n@m*LOvW6p z*qN)k|L(eJ$a_l3jqeuP6r!p=HNllrGZ%02#H58GaC+b5<9W%1uFhH>ctxSihK*v{ z+ea$SegR0S;o`J94QrHsnru2|i>~B1srP5UHZ0piC1AP0Z9#c>j-B!0^XQYV0yKVf zuGUrxzwinK>*Z4A#gaq&F{C?I(wGKdm%UGJ_% zq-LJjrNUl|le*qcRKmq~#R0)hC(myxLV?O-Ll%IX(*)vmHm`q!!)?L!0ePOolGS8U z=l-HIzg6ehhQSf&s`@GG$Mxc1^*Sj(Z+BUzZO6*DV~S}o`mOGf)TAk#K?AFbc0%_Wb{T`J zsL9Lcw>VnquBB+$D5VMT=buB(HaB~`y5kf+e_Az?&iFu z`<;DN6iK(CzdbfNRYQH&;@4J1xVNBXLGW>0Yf)uH95B9PK@RWcZz>HrhHhv4?qLv=MY%a;4}Ud zu3GGLNboG3^1=EKjsBdGAAEtIu=9bz`kB4ZBc6A|KJQ$hdE)3FpQSL{8(c&&pt0RP z25U&=Z;dlnQ5I*mg#LF{!c1%n>gxmcQ-8s2go;zMAqRy12&106gZzjY5KFN&^NFP3 zj~L|nfbVM(PJ+Wl2J(+Swa%n|A`8BAd;09p5yTc`(&{&{3fKE`z<(#;PeD91(CkzE zgCt*(qV5Z~Xs{Gg#U177G|g5!EIRvm>oW*r2d2Nd z_*hr@j#pPF#|~qk+GNTMax7BS7%Z&=aBS-DSyQa6oV)R+BVy@m8P4a{Dk>VJYIqLV zYOP$WERo^xz8@}|sNn&^X~23pVgnMPNjPV=;r452>&W*XFyt0}?jGlaADiPI)F?lq zwYGYRFI6*WIOsAKp@@-Wq=;hceD^En z)M+~9Q=k;a!}3hcuU?Wx1(@!TMXG0BWK5BU5VG#pkBy)XtrW>38G8}Skh{hZm%`?6 z$7<^u(wO1hwRH@EBcJuJU?&E;@}tJOs_W#5rGJ!+EF`t^SbeJ>t1F~?=v3U`M)PZI z@zm4VNzx~>4)drf>b%C}{&92R1greOvHvfu)^3E7KtZ4;h?jeRc&@L6mKHuFv4tJt zxKh+S!_VJ5HGu=oynuO4Zce9^kbCi6w(P|mOidEB91hAi)i*&2QJwy+BYu1JCN60I zfjP1W2gL;(g;O(+Gq=0`qM6U!CFw#DR5RDhC%%i zDME`oRWI!8>h7H|Dzxbm<5;n)74cxOud$z@56L_9`SBIv=?<~ z=RYG&Ifc>+2n?0FgapA^cda@CkR9!FHnl|SwoAy$H4wprvEFV{==D>&u6oO;NWY*Yb zs$C$0og|b5Xxh${?6dQ-fP{Q%Sq4&@-oJ9laP)pO&`aE}U0q>q|`c;>*wO z!v}vQs}oSNj2|TRZh|h%*Yr6YhXwU#`Tud$GlWetG1v=wRE`ji>+2z{fxh^QMw{55 zoPFTvw0ibdzbJ?Emoct&h~3PiY)J_?m0qevU{%KK0(Acws?#O%V#gukbz#9l_Qzt!?K2mLx^3`0y&}Pk`XS0P_<(vaO-khA-54COv3Uu2Ts9Z*#9MgTyk;|sK z6`yLdRwW04N5l<-3LPWHUVFRC@kzMUA_(R)ku6b{g-MFpHA+`q;wPegg3%OYJ8Yu2 zL<96#E`{HFPAgCoQ0sd4^P`U>}c!>KEe`4d{7eW9?|f=2zSEznpI$2 zS%(rqA#1{gg*vCPu@L{;F(wc2Hi=<#^t}7AFO=E5j`4G-Xx4alm0if0kL1b#*5WaL z@eH>z1V~E8dUdRS=uA@K;)R#@2QF88O&FtNVg9bpPZH4nLK38}s>t$eiJbfv%O?ZN7hMFGitzo4uEdK&9o{u=-!wy1@mKUM6h;b;{ z*)jU&cb-$ltIEAha8BMQ{`0wpQU*^P+NLYQ4q?~ZSs&qankSNy$UMAiFRA9^m3uU> zQ{bd=;9_Ucosc4%e$rF_9&!22)cr88ZLoGUU%2#ydY5~a4F5b!^^ZTB2J}L5?6`$N zM-2p9oD1inqs0^F?d-E9M<(uCVQB&n*h)d=8pePQx4hakQ8hjRZ8N5>K_2OoV^?oh zgIyI7^?tCihFKjvq7FU3=%n zND@=jCgv$qR__*NR*qClprJHW93|GfxF**qXayk8uMOod*@v*LxwF_G^)*LcaJOyb z+4qC&fbi_Th5lzsQ#Azbx}=dgP6&m^n^m-q|7wp;Vt&|JWo(ZQQ+jR}!BVMS5>42d z5e>CiyKLNK%hmjY9oeF9$Lc_oRYtTs2h0T}%>pAGv=yDQC17JBbI9n>Zs&5dC0h6k|p`c{VTnPOsO&b?}=tkJ!a|iV`PJ@9jnN#DU0#5q>s3W z+$2%E(E}hjSVkX0MYC<@i44(CjcOcAso@2M(fWx5kAAzkaf;YFEyG6gH9Lr(WX#F! z+^w$C%evY8Qf54+&gq`79@SW5QpJ&Y&}Fi1G_v_CAD=^W%AEZChBx`$^ECvG-htsF*uh zdbF1N6%`2Sbo_l8eq?!K45DvqHAh-)RJgP)VHf$Sp5NHSB12^4xX^};_h9Q($bOE; zSTY)LzHdvkS;0*ki!PCVN440bcSj8}_Fo>&grt8=8noW9`&Qpwd zF14l1CxTt7gqgAO{!n>%#LHv8CMr+NksQmZLYMn3yRt?3zLl*?vlokyk1W!s?;0@{lNH%x!aKJ7T%g& zSxq)b`Gv0;FGPAr>^J?-XDC-+w`lw}Iov)Eeijo0pmSt%(eO2qtN-|2r7E)0FhwM6 z-J-XF^1+CZ-t6C)ef8^v`d@G2+`4;I@`dwFD-REC9@ni$YQ*LI#*6_c{`qP!Tu;lj zavNiJc*MuHhG`wweWEpU92OHsQ1; z9|_S>b~_?g9phA!Z-Ja|6)tO3?B|$WPtPE>|1yzi+e^VOglD32;?wr9NvUiaZ6tVB zXYee*!Cx7K8Emq4A8omi6^jX#2ZSgfxq(4d;Xb!vMLAr1J$bCXp({g`qOO`vz5M*R zcy~%wlxSz^$9gyRpWqwxQ&Zz=kmw?oXZaBjnX<*T@st94w7Y;~)l{`;ViGHF>&Er~ zU?QP_Z$U=XHxeW=Tw7#(G=A;nEpm?R2F+UI(E@h5+JKL_H?u#a18yyh5XLe4Dm}TU zvXfmDdEmRK`aP@kBi)3x=PZGKj3;JRp|(^;e0QBE;>pWJK$2OEx3XwRqR%!w%oN+u z9f8)yEwG>-qeq%SK!>$ft9F|ZFx+B~ERv(7jdNR`qp#NO3JUr-7t5CZOb=5anc+yp zmZ0o`^=ju$qZ%FT=5^7P!MLl&jiI8Lf^b42nQ!&n-!w6{j*Ar1J#fly z*uVcZ34Vl>vU@hqPcqw?_0Bn0%>bzjo04Ji*GcX<61_K0n&UdSm2(Egy)C~9i4i_{ zv^fw}-J!(nA0R_*lF0Y^JHFWKw}sir-eBeBXZVrFl*%S_nk`Tovzq!SaCPpr_q^DH zGPX1M+J>|VSG#dFr_U173SJz&xptm@U;m=iViORl`Xa3l>5rkL+@{|!a9bvCKdO*& zFJ^+Hw}#|IVq?XnL<*m*Gs=PVzOcp>)aP>49CQta0pcvWa21)rSpoWQr`?$3f2R22w((7%e`AvA|#fF;lv1?R<2o!Y#`M{+Y8T&NkDcF|0%nC z_~$zGqYth8r(&hYb}ER1d4*-;ur2718YQ1Isg%#zM=77DI90P<_;o-4_|h`cA<58b zSWcgFT-$A1VX48WuUl(cbZg^ky$t91qA&5$tiJLe`wAl72k=W@xY;i8n7W)e`r(jl@mA>Xu38LEy1DNx6#=811epBh!>58xyL<7+u*TAZ)Mc|Wo)UnzXgZC0quT6UGr4axVpk9clSxviGY0S zTEWLV5fz7++$!v|q+ct!1M7(qF$pRnhWS5d?<%`HS`Y88p3R^wFKPJV3I4cWPGD(c z(vpe#&170BGFn;+W4TXByul6}s&Vhj8w_Ml(|IjBr>KYOp1Scf2iuM1g;IQuBB5Yq zic5jCHmcXueS{sSnGC=P5-(aMcGyufcrSb9bjx=)iKkP{$FIICc^CxO>&}`RE^-Xa zdz1Wd4$|gd`XvMl8u%5L}et_XD%8E|1*f z2j-q68$*FcG65I4Ro;DWX9uQU(}$)>^kw=a$0K@z z7Cn)-rH8vzC_RT+Z|`Lx5#=!#`*Ew5hZ(gf&O`yXhfB!8TCmi+FsER6AGvh*&s*(_ zKV`Sg%ZuhR5SKzEZ9I3H6)JTBsSIT+S{ z%Do>~43c^1>u>r!)qnQN%-!c@w`@POZ@mZgBdOoSvr|tisj{5Ka=g2h>qt@|YE@jh zpFrn32KPwG(VIXHcTt0RR(oTTfZmw0KwrJv=A&59uCszarf)=%jTi2NdEFTv$V6@m zv>dKp(PMEv*AnnJcVu`!pYFERDyf;O;5*T4UDty?HvkP_`QYr+6*XoRuGVF{yPEr$I*@EPlJ4utw7KuzQOVYE&@k#j|*ttzKmf(_*tO zxlTOJb1T?tK<8I}t9#>?B&|nRoU>uk7a)n{5{^puAWvo5({JIJu5G{{s;r?PfgC5h zJ|2SiZxKu1&oF^7VWJ$?M+%1rHa{$U$*(l1Bom5&-SySDkl3*%uM}M{Y>x8iA_!T( zjNH3!8dzmvFp;WL=J$e168RcxKgo^C9Y%6xQ!%P*-Q6-GB2K#8;74UU?8kdi8JPq? z%l1E*WD~txXD6nJICIJS_JKyX!uB(eRo_FQ=0gQee)_M)vn|vu-C;Ktz1?sW-Mdy2 zae1HVChPsSR@0oDhRr6Th2ea+0j0TqD`4X_U?#cuJMV?zpBN@+zP`VwmRlq#YIP}K zmB$R;4?Co2X*(~E^?M*8in_Z-zQx!5qSa6SG#91Z6QHl)h^1ni$7}*gs{g%g(`u6A zV)Jv4N)OVLB!@unZ;mY;8P@T4sEK)LyqDL{P|4-SkI}R|RBeoTCe&RXU|?0SQ?9p# ze=gpw)J7$d)S;6SWyyl^3;n3#4rDt zH%b0pEPM!DKcUjKX(k>>Sjvptl~tCw5zXrR$;X>Hshm0+Om$a)x16rZ85%#1V*yP2T%akdMQd3sbegpF^`xmIcQ z<%kzqrGqx_-3 z(z9OVlkolK7h$=v9$MsvfxDJxi@9E}4wiCTnV)l+LaiCG3264Hi$neqWH_O&wiV1hfZX}xe)$JW3-YTuSD{`m0}#PKeb zzP5lY_J^1t9|yth_=lYbC~;#>!umY^72xio7_L{(32OEwqZmk z`M~L!k}?r4K9T-tPTq8RSGg;HI8PPEYXE7CH9Q@Bp=;RiEbG~zO^cgXxoc+ z$zK_Pd6)aGZ!PSC?$^8n#*t98|2v_3(K5QB_y%C|wLClx8TQnAi!)HfEdpLyX~yRO-_5}2=a59pLKAmDe;blB)9*BBqwC`2{UN37aQVN`Up+<^sKnz@j zD(3_uWZdR652(HPt7J866aS>PHr+36zRM3M3|Dz?yD#l2r1YH`xi&%J0!ZG1!7dh=w=@JvnAp^=bBP)Jn(pM-4t>|oidc>Ip{bEf5TU!;mfu_a0S>%JJabM z8E%Z@t7W`o@XUMKUqGCaXm9{yOa$D&Vt{?plCb<75bogC_^7YZ#2LB=6>4+-f~2eap!Lw zx|ShbPvKQi+Gu@%dJ_7xketV$?%{mLd5UD-FChM8e^c%yLp=u}JG%_`mnAJ*;P*&> z6eRnN=!D~S&X2Z<#d+y%rAEN@R)|hG60?jj;>K?8g>v7;X*6)jWw74zJ{(DQ8(I(i zmTwKevysK-CGOtFVs*KI7-S!nUee#lY5L?nAbt6DH}JClaFwTMKr+0HKl{?3nxS$* zvcE*1ud2G@JB9U)ZEIqm&D|n|43FsiNdb>>m6WDd{p$mC$1*C@G}Cp{X_3`2Z67gs zohyItA~eEkbanU_zT^1{8$n3eFX#D#C2pjkdtoO9H_l%47&b2_|amrd5iub8qg;WdZ5+SS>e!z*Gi+#>Gz zAedpel`Ak=K*QPBR9~!K@Lo8i(d5a`y&Bac4otsrKOO0ACZNg>{e3v1 z_ATd+7E<~WmzkZVp^8sqn4}8YO5l^1gUjB2v#;)<3u1)B-#0DoF1_F6PfYIcbMARx z-hXVWimeZnGMGy)7~S<11q-ZPhO*iCUs9#eX=D;BcXyfxbCZqqD=+ow@sl0Q;A~vT zEEC@(pZFoww^8;V*V2Pzc3KuHR6a;1zX-uPI6ppMYzn5{U?y(7)4sU&MH1UtA==2E z-+q^*0{p}@;VIWi{>Ox{U(ydNp6UiQ34ZUw+o5hkPb>9M{qyqD{V_kgIVuQCwd-)e zBU0~Ig!&16AEUfZ8|hkl+tjR5*g5Be>=+3xzaB>n9(@fe;2qvO*1>XT;p}8(04>jN z*u<`SkhP`lXXtu;E75B~sYvoQh8SH(Ng67tB8qT_o8ETypYbe?3h7Os;)!m5MI0o9`@5_S9k>$Y#FC$|5>Wu^7Yv_j1 z4k>5A`O4p-VYqYsczaBlbc^Ogho7p6mdLSFNC@b9_(yb8pzl=AvgBQwxRtaxB8rG7 z!M9n6Zv_#N+$XwnU6$y-{CmsHELQu!U;dAqb2!>N&Fw88{rk@XS9NuBDF3e~{_lGW z$txo3H)nO|-OjSN|X53X-)a=HbaxP$2%lNBp194+@o^{kK#4d&Acsz`P4% zWB==9|MR23Ra3KAk^hi>ZJzsu!+bo<}irT>3T+F=`)vz$0R^p_+N P@TVlN`m*e~>4*OXbH`3# literal 0 HcmV?d00001 From d47c62cf11f5b3842fab7cfc125a42e23a9ec895 Mon Sep 17 00:00:00 2001 From: Vasa Date: Mon, 5 Mar 2018 18:38:15 -0800 Subject: [PATCH 09/83] feature to download thumbnail feature to select a language filter feature to add a prefix word in front of the image name --- .../google_images_download.py | 153 +++++++++++++++--- 1 file changed, 127 insertions(+), 26 deletions(-) diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index f0113fc2..22aaf329 100644 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -58,6 +58,10 @@ parser.add_argument('-m', '--metadata', default=False, help="Print the metadata of the image", action="store_true") parser.add_argument('-e', '--extract_metadata', default=False, help="Dumps all the logs into a text file", action="store_true") parser.add_argument('-st', '--socket_timeout', default=False, help="Connection timeout waiting for the image to download", type=float) +parser.add_argument('-th', '--thumbnail', default=False, help="Downloads image thumbnail along with the actual image", action="store_true") +parser.add_argument('-la', '--language', default=False, help="Defines the language filter. The search results are authomatically returned in that language", type=str, required=False, + choices=['Arabic','Chinese (Simplified)','Chinese (Traditional)','Czech','Danish','Dutch','English','Estonian','Finnish','French','German','Greek','Hebrew','Hungarian','Icelandic','Italian','Japanese','Korean','Latvian','Lithuanian','Norwegian','Portuguese','Polish','Romanian','Russian','Spanish','Swedish','Turkish']) +parser.add_argument('-pr', '--prefix', default=False, help="A word that you would want to prefix in front of each image name", type=str, required=False) args = parser.parse_args() @@ -85,7 +89,7 @@ if args.similar_images: current_time = str(datetime.datetime.now()).split('.')[0] - search_keyword = [current_time.replace(":", "-")] + search_keyword = [current_time.replace(":", "_")] # If single_image or url argument not present then keywords is mandatory argument if args.single_image is None and args.url is None and args.similar_images is None and args.keywords is None: @@ -169,29 +173,6 @@ def format_object(object): formatted_object['image_thumbnail_url'] = object['tu'] return formatted_object -#make directories -def create_directories(main_directory,dir_name): - # make a search keyword directory - try: - if not os.path.exists(main_directory): - os.makedirs(main_directory) - time.sleep(0.2) - path = str(dir_name) - sub_directory = os.path.join(main_directory, path) - if not os.path.exists(sub_directory): - os.makedirs(sub_directory) - else: - path = str(dir_name) - sub_directory = os.path.join(main_directory, path) - if not os.path.exists(sub_directory): - os.makedirs(sub_directory) - except OSError as e: - if e.errno != 17: - raise - # time.sleep might help here - pass - return - #function to download single image def single_image(): url = args.single_image @@ -272,6 +253,13 @@ def similar_images(): #Building URL parameters def build_url_parameters(): + if args.language: + lang = "&lr=" + lang_param = {"Arabic":"lang_ar","Chinese (Simplified)":"lang_zh-CN","Chinese (Traditional)":"lang_zh-TW","Czech":"lang_cs","Danish":"lang_da","Dutch":"lang_nl","English":"lang_en","Estonian":"lang_et","Finnish":"lang_fi","French":"lang_fr","German":"lang_de","Greek":"lang_el","Hebrew":"lang_iw ","Hungarian":"lang_hu","Icelandic":"lang_is","Italian":"lang_it","Japanese":"lang_ja","Korean":"lang_ko","Latvian":"lang_lv","Lithuanian":"lang_lt","Norwegian":"lang_no","Portuguese":"lang_pt","Polish":"lang_pl","Romanian":"lang_ro","Russian":"lang_ru","Spanish":"lang_es","Swedish":"lang_sv","Turkish":"lang_tr"} + lang_url = lang+lang_param[args.language] + else: + lang_url = '' + built_url = "&tbs=" counter = 0 params = {'color':[args.color,{'red':'ic:specific,isc:red', 'orange':'ic:specific,isc:orange', 'yellow':'ic:specific,isc:yellow', 'green':'ic:specific,isc:green', 'teal':'ic:specific,isc:teel', 'blue':'ic:specific,isc:blue', 'purple':'ic:specific,isc:purple', 'pink':'ic:specific,isc:pink', 'white':'ic:specific,isc:white', 'gray':'ic:specific,isc:gray', 'black':'ic:specific,isc:black', 'brown':'ic:specific,isc:brown'}], @@ -293,6 +281,7 @@ def build_url_parameters(): else: built_url = built_url + ',' + ext_param counter += 1 + built_url = lang_url+built_url return built_url #building main search URL @@ -309,6 +298,7 @@ def build_search_url(search_term,params): else: url = 'https://www.google.com/search?q=' + quote( search_term) + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch' + params + '&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg' + #print(url) return url #measures the file size @@ -322,6 +312,105 @@ def file_size(file_path): size /= 1024.0 return size +# make directories +def create_directories(main_directory, dir_name): + dir_name_thumbnail = dir_name + " - thumbnail" + # make a search keyword directory + try: + if not os.path.exists(main_directory): + os.makedirs(main_directory) + time.sleep(0.2) + path = str(dir_name) + sub_directory = os.path.join(main_directory, path) + if not os.path.exists(sub_directory): + os.makedirs(sub_directory) + if args.thumbnail: + sub_directory_thumbnail = os.path.join(main_directory, dir_name_thumbnail) + if not os.path.exists(sub_directory_thumbnail): + os.makedirs(sub_directory_thumbnail) + else: + path = str(dir_name) + sub_directory = os.path.join(main_directory, path) + if not os.path.exists(sub_directory): + os.makedirs(sub_directory) + if args.thumbnail: + sub_directory_thumbnail = os.path.join(main_directory, dir_name_thumbnail) + if not os.path.exists(sub_directory_thumbnail): + os.makedirs(sub_directory_thumbnail) + except OSError as e: + if e.errno != 17: + raise + # time.sleep might help here + pass + return + + +# Download Images +def download_image_thumbnail(image_url, image_format, main_directory, dir_name, count): + if args.print_urls: + print("Image URL: " + image_url) + try: + req = Request(image_url, headers={ + "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"}) + try: + # timeout time to download an image + if args.socket_timeout: + timeout = float(args.socket_timeout) + else: + timeout = 15 + response = urlopen(req, None, timeout) + + # keep everything after the last '/' + image_name = str(image_url[(image_url.rfind('tbn')) + 4:]) + image_name = image_name.lower() + # if no extension then add it + # remove everything after the image name + if image_format == "": + image_name = image_name + ".jpg" + else: + image_name = image_name + "." + image_format + + #prefix name in image + if args.prefix: + prefix = args.prefix + " " + else: + prefix = '' + + path = main_directory + "/" + dir_name + " - thumbnail" + "/" + prefix + " " + str(count) + ". " + image_name + output_file = open(path, 'wb') + data = response.read() + output_file.write(data) + response.close() + + # image size parameter + if args.print_size: + print("Image Size: " + str(file_size(path))) + + download_status = 'success' + download_message = "Completed Image Thumbnail ====> " + prefix + str(count) + ". " + image_name + + except UnicodeEncodeError as e: + download_status = 'fail' + download_message = "UnicodeEncodeError on an image...trying next one..." + " Error: " + str(e) + + except HTTPError as e: # If there is any HTTPError + download_status = 'fail' + download_message = "HTTPError on an image...trying next one..." + " Error: " + str(e) + + except URLError as e: + download_status = 'fail' + download_message = "URLError on an image...trying next one..." + " Error: " + str(e) + + except ssl.CertificateError as e: + download_status = 'fail' + download_message = "CertificateError on an image...trying next one..." + " Error: " + str(e) + + except IOError as e: # If there is any IOError + download_status = 'fail' + download_message = "IOError on an image...trying next one..." + " Error: " + str(e) + return download_status, download_message + + # Download Images def download_image(image_url,image_format,main_directory,dir_name,count): if args.print_urls: @@ -347,7 +436,13 @@ def download_image(image_url,image_format,main_directory,dir_name,count): else: image_name = image_name[:image_name.find(image_format) + 3] - path = main_directory + "/" + dir_name + "/" + str(count) + ". " + image_name + # prefix name in image + if args.prefix: + prefix = args.prefix + " " + else: + prefix = '' + + path = main_directory + "/" + dir_name + "/" + prefix + str(count) + ". " + image_name output_file = open(path, 'wb') data = response.read() output_file.write(data) @@ -358,7 +453,7 @@ def download_image(image_url,image_format,main_directory,dir_name,count): print("Image Size: " + str(file_size(path))) download_status = 'success' - download_message = "Completed ====> " + str(count) + ". " + image_name + download_message = "Completed Image ====> " + prefix + str(count) + ". " + image_name except UnicodeEncodeError as e: download_status = 'fail' @@ -428,6 +523,12 @@ def _get_all_items(page,main_directory,dir_name,limit): download_status,download_message = download_image(object['image_link'],object['image_format'],main_directory,dir_name,count) print(download_message) if download_status == "success": + + # download image_thumbnails + if args.thumbnail: + download_status, download_message_thumbnail = download_image_thumbnail(object['image_thumbnail_url'],object['image_format'],main_directory, dir_name, count) + print(download_message_thumbnail) + count += 1 else: errorCount += 1 From ec9c5c31a3c14a19ac85b79a81a21e325f483e3c Mon Sep 17 00:00:00 2001 From: Vasa Date: Mon, 5 Mar 2018 18:40:26 -0800 Subject: [PATCH 10/83] doc and version update --- README.rst | 102 ++++++++++++++++++++++++++++++++++++++--------------- setup.py | 4 +-- 2 files changed, 75 insertions(+), 31 deletions(-) diff --git a/README.rst b/README.rst index a1df6244..fe9e864d 100644 --- a/README.rst +++ b/README.rst @@ -25,13 +25,13 @@ You can use **one of the below methods** to download and use this repository. **Using pip:** -:: +.. code-block:: bash $ pip install google_images_download **Manually using CLI:** -:: +.. code-block:: bash $ git clone https://github.com/hardikvasa/google-images-download.git $ cd google-images-download && sudo python setup.py install @@ -45,20 +45,20 @@ Usage If installed via pip or using CLI, use the following command: -:: +.. code-block:: bash $ googleimagesdownload [Arguments...] If downloaded via the UI, unzip the file downloaded, go to the 'google_images_download' directory and use one of the below commands: -:: +.. code-block:: bash $ python3 google_images_download.py [Arguments...] OR $ python google_images_download.py [Arguments...] Arguments -~~~~~~~~~ +--------- +------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | Argument | Short hand | Description | @@ -144,6 +144,12 @@ Arguments | socket_timeout | st | Allows you to specify the time to wait for socket connection. | | | | You could specy a higher timeout time for slow internet connection. The default value is 15 seconds. | +------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| thumbnail | th | Downloads image thumbnails corresponding to each image downloaded. Thumbnails are saved in their own sub-directories. | ++------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| language | la | Defines the language filter. The search results are authomatically returned in that language | ++------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| prefix | pr | A word that you would want to prefix in front of actual image name. This feature can be used for image identification purpose.| ++------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | help | h | show the help message regarding the usage of the above arguments | +------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ @@ -154,66 +160,96 @@ Examples - Simple examples -``googleimagesdownload --keywords "Polar bears, baloons, Beaches" --limit 20`` +.. code-block:: bash + + $ googleimagesdownload --keywords "Polar bears, baloons, Beaches" --limit 20 - Using Suffix Keywords allows you to specify words after the main keywords. For example if the ``keyword = car`` and ``suffix keyword = 'red,blue'`` then it will first search for ``car red`` and then ``car blue`` -``googleimagesdownload --k "car" -sk 'red,blue,white' -l 10`` +.. code-block:: bash + + $ googleimagesdownload --k "car" -sk 'red,blue,white' -l 10 - To use the short hand command -``googleimagesdownload -k "Polar bears, baloons, Beaches" -l 20`` +.. code-block:: bash + + $ googleimagesdownload -k "Polar bears, baloons, Beaches" -l 20 - To download images with specific image extension/format -``googleimagesdownload --keywords "logo" --format svg`` +.. code-block:: bash + + $ googleimagesdownload --keywords "logo" --format svg - To use color filters for the images -``googleimagesdownload -k "playground" -l 20 -c red`` +.. code-block:: bash + + $ googleimagesdownload -k "playground" -l 20 -c red - To use non-English keywords for image search -``googleimagesdownload -k "北极熊" -l 5`` +.. code-block:: bash + + $ googleimagesdownload -k "北极熊" -l 5 - To download images from the google images link -``googleimagesdownload -k "sample" -u `` +.. code-block:: bash + + $ googleimagesdownload -k "sample" -u - To save images in specific main directory (instead of in 'downloads') -``googleimagesdownload -k "boat" -o "boat_new"`` +.. code-block:: bash + + $ googleimagesdownload -k "boat" -o "boat_new" - To download one single image with the image URL -``googleimagesdownload --keywords "baloons" --single_image `` +.. code-block:: bash + + $ googleimagesdownload --keywords "baloons" --single_image - To download images with size and type constrains -``googleimagesdownload --keywords "baloons" --size medium --type animated`` +.. code-block:: bash + + $ googleimagesdownload --keywords "baloons" --size medium --type animated - To download images with specific usage rights -``googleimagesdownload --keywords "universe" --usage_rights labled-for-reuse`` +.. code-block:: bash + + $ googleimagesdownload --keywords "universe" --usage_rights labled-for-reuse - To download images with specific color type -``googleimagesdownload --keywords "flowers" --color_type black-and-white`` +.. code-block:: bash + + $ googleimagesdownload --keywords "flowers" --color_type black-and-white - To download images with specific aspect ratio -``googleimagesdownload --keywords "universe" --aspect_ratio panoramic`` +.. code-block:: bash + + $ googleimagesdownload --keywords "universe" --aspect_ratio panoramic - To download images which are similar to the image in the image URL that you provided (Reverse Image search). -``googleimagesdownload -si -l 10`` +.. code-block:: bash + + $ googleimagesdownload -si -l 10 - To download images from specific website or domain name for a given keyword -``googleimagesdownload --keywords "universe" --specific_site example.com`` +.. code-block:: bash + + $ googleimagesdownload --keywords "universe" --specific_site example.com ===> The images would be downloaded in their own sub-directories inside the main directory (either the one you provided or in 'downloads') in the same folder you are in. @@ -221,7 +257,7 @@ Examples -------------- Troubleshooting ----------- +--------------- **## SSL Errors** @@ -234,18 +270,24 @@ and run the file. While using the above commands, if you get ``Error: -bash: googleimagesdownload: command not found`` then you have to set the correct path variable. To get the details of the repo, run the following command: -:: - $ pip show -f google_images_download + +.. code-block:: bash + + $ pip show -f google_images_download you will get the result like this: -:: + +.. code-block:: bash + Location: /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages Files: ../../../bin/googleimagesdownload together they make: ``/Library/Frameworks/Python.framework/Versions/2.7/bin`` which you need add it to the path: -:: - $ export PATH="/Library/Frameworks/Python.framework/Versions/2.7/bin + +.. code-block:: bash + + $ export PATH="/Library/Frameworks/Python.framework/Versions/2.7/bin" **## [Errno 13] Permission denied creating directory 'downloads'** @@ -256,8 +298,10 @@ When you run the command, it downloads the images in the current directory (the **## Permission denied while installing the library** On MAC and Linux, when you get permission denied when installing the library using pip, try doing a user install. -:: - $ pip install google_images_download --user + +.. code-block:: bash + + $ pip install google_images_download --user You can also run pip install as a superuser with ``sudo pip install google_images_download`` but it is not generally a good idea because it can cause issues with your system-level packages. @@ -266,7 +310,7 @@ Structure Below diagram represents the code logic. -.. figure:: images/flow-chart.png +.. figure:: http://www.zseries.in/flow-chart.png :alt: Contribute diff --git a/setup.py b/setup.py index 65211e08..29f9b083 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from codecs import open from os import path -__version__ = '1.0.2' +__version__ = '1.0.5' here = path.abspath(path.dirname(__file__)) @@ -35,7 +35,7 @@ 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', ], - keywords='', + keywords='google images download save filter color image-search image-dataset image-scrapper image-gallery terminal command-line', packages=find_packages(exclude=['docs', 'tests*']), include_package_data=True, author='Hardik Vasa', From 1fb6745ad06f7d262a55209f3906d82b65e7898f Mon Sep 17 00:00:00 2001 From: Hardik Vasa Date: Mon, 5 Mar 2018 20:59:58 -0800 Subject: [PATCH 11/83] Update README.rst table of contents --- README.rst | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/README.rst b/README.rst index fe9e864d..e1018329 100644 --- a/README.rst +++ b/README.rst @@ -1,47 +1,55 @@ Google Images Download -====================== +###################### Python Script for 'searching' and 'downloading' hundreds of Google images to the local hard disk! +Contents + +.. contents:: :local: + Summary -------- +======= This is a command line python program to search keywords/key-phrases on Google Images and then also optionally download one or more images to your computer. This is a small program which is ready-to-run, but still under development. Many more features will be added to it going forward. + Compatability -------------- +============= This program is compatible with both the versions of python - 2.x and 3.x (recommended). It is a download-and-run program with no changes to the file. You will just have to specify parameters through the command line. Installation ------------- +============ You can use **one of the below methods** to download and use this repository. -**Using pip:** +Using pip +--------- .. code-block:: bash $ pip install google_images_download -**Manually using CLI:** +Manually using CLI +------------------ .. code-block:: bash $ git clone https://github.com/hardikvasa/google-images-download.git $ cd google-images-download && sudo python setup.py install -**Manually using UI:** +Manually using UI +----------------- Go to the `repo on github `__ ==> Click on 'Clone or Download' ==> Click on 'Download ZIP' and save it on your local disk. Usage ------ +===== If installed via pip or using CLI, use the following command: @@ -58,7 +66,7 @@ If downloaded via the UI, unzip the file downloaded, go to the 'google_images_do $ python google_images_download.py [Arguments...] Arguments ---------- +========= +------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | Argument | Short hand | Description | @@ -156,7 +164,7 @@ Arguments **Note:** If ``single_image`` or ``url`` parameter is not present, then keywords is a mandatory parameter. No other parameters are mandatory. Examples --------- +======== - Simple examples @@ -257,7 +265,7 @@ Examples -------------- Troubleshooting ---------------- +=============== **## SSL Errors** @@ -306,7 +314,7 @@ On MAC and Linux, when you get permission denied when installing the library usi You can also run pip install as a superuser with ``sudo pip install google_images_download`` but it is not generally a good idea because it can cause issues with your system-level packages. Structure ---------- +========= Below diagram represents the code logic. @@ -314,7 +322,7 @@ Below diagram represents the code logic. :alt: Contribute ----------- +========== Anyone is welcomed to contribute to this script. If you would like to make a change, open a pull request. @@ -324,7 +332,7 @@ For issues and discussion visit the The aim of this repo is to keep it simple, stand-alone, backward compatible and 3rd party dependency proof. Disclaimer ----------- +========== This program lets you download tons of images from Google. Please do not download any image without violating its copyright terms. From f6398182c1468e1acb2bea7f2140b84c94bd9120 Mon Sep 17 00:00:00 2001 From: Vasa Date: Sat, 10 Mar 2018 12:45:00 -0800 Subject: [PATCH 12/83] file support for keywords import per #47 --- .../google_images_download.py | 30 ++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index 22aaf329..4a1ac3b1 100644 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -25,10 +25,12 @@ import datetime import json import re +import codecs # Taking command line arguments from users parser = argparse.ArgumentParser() parser.add_argument('-k', '--keywords', help='delimited list input', type=str, required=False) +parser.add_argument('-kf', '--keywords_from_file', help='extract list of keywords from a text file', type=str, required=False) parser.add_argument('-sk', '--suffix_keywords', help='comma separated additional words added to main keyword', type=str, required=False) parser.add_argument('-l', '--limit', help='delimited list input', type=str, required=False) parser.add_argument('-f', '--format', help='download images with specific format', type=str, required=False, @@ -69,6 +71,32 @@ if args.keywords: search_keyword = [str(item) for item in args.keywords.split(',')] +#Initialization and Validation of user arguments +if args.keywords_from_file: + search_keyword = [] + file_name = args.keywords_from_file + with codecs.open(file_name, 'r', encoding='utf-8-sig') as f: + if '.csv' in file_name: + for line in f: + if line in ['\n', '\r\n']: + pass + else: + search_keyword.append(line.replace('\n', '').replace('\r', '')) + # print(line) + print(search_keyword) + elif '.txt' in file_name: + for line in f: + if line in ['\n', '\r\n']: + pass + else: + # print line + search_keyword.append(line.replace('\n', '')) + print(search_keyword) + else: + print("Invalid file type: Valid file types are either .txt or .csv \n" + "exiting...") + sys.exit() + #Additional words added to keywords if args.suffix_keywords: suffix_keywords = [" " + str(sk) for sk in args.suffix_keywords.split(',')] @@ -92,7 +120,7 @@ search_keyword = [current_time.replace(":", "_")] # If single_image or url argument not present then keywords is mandatory argument -if args.single_image is None and args.url is None and args.similar_images is None and args.keywords is None: +if args.single_image is None and args.url is None and args.similar_images is None and args.keywords is None and args.keywords_from_file is None: parser.error('Keywords is a required argument!') # If this argument is present, set the custom output directory From f896b2b77ea397c71d97c0dc81531ce64f6e3fd6 Mon Sep 17 00:00:00 2001 From: Vasa Date: Sat, 10 Mar 2018 14:29:38 -0800 Subject: [PATCH 13/83] #47 and other minor edits --- README.rst | 211 +++++++++++++++++++++++++++++------------------------ setup.py | 2 +- 2 files changed, 117 insertions(+), 96 deletions(-) diff --git a/README.rst b/README.rst index e1018329..befdac05 100644 --- a/README.rst +++ b/README.rst @@ -29,14 +29,12 @@ Installation You can use **one of the below methods** to download and use this repository. Using pip ---------- .. code-block:: bash $ pip install google_images_download Manually using CLI ------------------- .. code-block:: bash @@ -44,7 +42,6 @@ Manually using CLI $ cd google-images-download && sudo python setup.py install Manually using UI ------------------ Go to the `repo on github `__ ==> Click on 'Clone or Download' ==> Click on 'Download ZIP' and save it on your local disk. @@ -68,98 +65,122 @@ If downloaded via the UI, unzip the file downloaded, go to the 'google_images_do Arguments ========= -+------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| Argument | Short hand | Description | -+==================+=============+===============================================================================================================================+ -| keywords | k | Denotes the keywords/key phrases you want to search for and the directory file name. | -| | | | -| | | Tips: | -| | | | -| | | * If you simply type the keyword, Google will best try to match it | -| | | * If you want to search for exact phrase, you can wrap the keywords in double quotes ("") | -| | | * If you want to search to contain either of the words provided, use **OR** between the words. | -| | | * If you want to explicitly not want a specific word use a minus sign before the word (-) | -+------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| suffix_keywords | sk | Denotes additional words added after main keyword while making the search query. | -| | | | -| | | Useful when you have multiple suffix keywords for one keyword. | -| | | | -| | | The final search query would be: | -+------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| limit | l | Denotes number of images that you want to download. | -+------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| format | f | Denotes the format/extension that you want to download. | -| | | | -| | | `Possible values: jpg, gif, png, bmp, svg, webp, ico` | -+------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| color | c | Denotes the color filter that you want to apply to the images. | -| | | | -| | | `Possible values`: | -| | | | -| | | `red, orange, yellow, green, teal, blue, purple, pink, white, gray, black, brown` | -+------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| color_type | ct | Denotes the color type you want to apply to the images. | -| | | | -| | | `Possible values: full-color, black-and-white, transparent` | -+------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| usage_rights | r | Denotes the usage rights/licence under which the image is classified. | -| | | | -| | | `Possible values:` | -| | | | -| | | * `labled-for-reuse-with-modifications`, | -| | | * `labled-for-reuse`, | -| | | * `labled-for-noncommercial-reuse-with-modification`, | -| | | * `labled-for-nocommercial-reuse` | -+------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| size | s | Denotes the relative size of the image to be downloaded. | -| | | | -| | | `Possible values: large, medium, icon` | -+------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| aspect_ratio | a | Denotes the aspect ration of images to download. | -| | | | -| | | `Possible values: tall, square, wide, panoramic` | -+------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| type | t | Denotes the type of image to be downloaded. | -| | | | -| | | `Possible values: face,photo,clip-art,line-drawing,animated` | -+------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| time | w | Denotes the time the image was uploaded/indexed. | -| | | | -| | | `Possible values: past-24-hours, past-7-days` | -+------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| delay | d | Time to wait between downloading two images | -+------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| url | u | Allows you search by image. It downloads images from the google images link provided | -+------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| single_image | x | Allows you to download one image if the complete URL of the image is provided | -+------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| output_directory | o | Allows you specify the main directory name. If not specified, it will default to 'downloads' | -+------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| similar_images | si | Reverse Image Search. | -| | | | -| | | Searches and downloads images that are similar to the image link/url you provide. | -+------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| specific_site | ss | Allows you to download images with keywords only from a specific website/domain name you mention as indexed in Google Images. | -+------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| print_urls | p | Print the URLs of the imageson the console. These image URLs can be used for debugging purposes | -+------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| print_size | ps | Prints the size of the image on the console | -+------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| metadata | m | Prints the metada of the image. This includes image size, origin, image attributes, description, image URL, etc. | -+------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| extract_metadata | e | Metadata of all the downloaded images is stored in a text file. This file can be found in the ``logs/`` directory | -+------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| socket_timeout | st | Allows you to specify the time to wait for socket connection. | -| | | You could specy a higher timeout time for slow internet connection. The default value is 15 seconds. | -+------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| thumbnail | th | Downloads image thumbnails corresponding to each image downloaded. Thumbnails are saved in their own sub-directories. | -+------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| language | la | Defines the language filter. The search results are authomatically returned in that language | -+------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| prefix | pr | A word that you would want to prefix in front of actual image name. This feature can be used for image identification purpose.| -+------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| help | h | show the help message regarding the usage of the above arguments | -+------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| Argument | Short hand | Description | ++===================+=============+===============================================================================================================================+ +| keywords | k | Denotes the keywords/key phrases you want to search for. For more than one keywords, wrap it in single quotes. | +| | | | +| | | Tips: | +| | | | +| | | * If you simply type the keyword, Google will best try to match it | +| | | * If you want to search for exact phrase, you can wrap the keywords in double quotes ("") | +| | | * If you want to search to contain either of the words provided, use **OR** between the words. | +| | | * If you want to explicitly not want a specific word use a minus sign before the word (-) | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| keywords_from_file| kf | Denotes the file name from where you would want to import the keywords. | +| | | | +| | | Add one keyword per line. Blank/Empty lines are truncated automatically. | +| | | | +| | | Only file types '.txt' or '.csv' are allowed. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| suffix_keywords | sk | Denotes additional words added after main keyword while making the search query. | +| | | | +| | | Useful when you have multiple suffix keywords for one keyword. | +| | | | +| | | The final search query would be: | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| limit | l | Denotes number of images that you want to download. | +| | | | +| | | As of now you can select anything between 1 and 100. If this value is not specified, it defaults to 100. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| format | f | Denotes the format/extension of the image that you want to download. | +| | | | +| | | `Possible values: jpg, gif, png, bmp, svg, webp, ico` | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| color | c | Denotes the color filter that you want to apply to the images. | +| | | | +| | | `Possible values: red, orange, yellow, green, teal, blue, purple, pink, white, gray, black, brown` | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| color_type | ct | Denotes the color type you want to apply to the images. | +| | | | +| | | `Possible values: full-color, black-and-white, transparent` | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| usage_rights | r | Denotes the usage rights/licence under which the image is classified. | +| | | | +| | | `Possible values:` | +| | | | +| | | * `labled-for-reuse-with-modifications`, | +| | | * `labled-for-reuse`, | +| | | * `labled-for-noncommercial-reuse-with-modification`, | +| | | * `labled-for-nocommercial-reuse` | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| size | s | Denotes the relative size of the image to be downloaded. | +| | | | +| | | `Possible values: large, medium, icon` | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| aspect_ratio | a | Denotes the aspect ratio of images to download. | +| | | | +| | | `Possible values: tall, square, wide, panoramic` | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| type | t | Denotes the type of image to be downloaded. | +| | | | +| | | `Possible values: face, photo, clip-art, line-drawing, animated` | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| time | w | Denotes the time the image was uploaded/indexed. | +| | | | +| | | `Possible values: past-24-hours, past-7-days` | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| delay | d | Time to wait between downloading two images | +| | | | +| | | Time is to be specified in seconds. But you can have sub-second times by using decimal points. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| url | u | Allows you search by image URL. It downloads images from the google images link provided | +| | | | +| | | If you are searching an image on the browser google images page, simply grab the browser URL and paste it in this parameter | +| | | It will download all the images seen on that page. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| single_image | x | Allows you to download one image if the complete (absolute) URL of the image is provided | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| output_directory | o | Allows you specify the main directory name in which the images are downloaded. | +| | | | +| | | If not specified, it will default to 'downloads' directory. This directory is located in the path from where you run this code| ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| similar_images | si | Reverse Image Search. | +| | | | +| | | Searches and downloads images that are similar to the absolute image link/url you provide. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| specific_site | ss | Allows you to download images with keywords only from a specific website/domain name you mention. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| print_urls | p | Print the URLs of the images on the console. These image URLs can be used for debugging purposes | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| print_size | ps | Prints the size of the images on the console | +| | | | +| | | The size denoted the actual size of the image and not the size of the image on disk | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| metadata | m | Prints the metada of the image on the console. | +| | | | +| | | This includes image size, origin, image attributes, description, image URL, etc. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| extract_metadata | e | This option allows you to save metadata of all the downloaded images in a text file. | +| | | | +| | | This file can be found in the ``logs/`` directory. The name of the file would be same as the keyword nam | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| socket_timeout | st | Allows you to specify the time to wait for socket connection. | +| | | | +| | | You could specify a higher timeout time for slow internet connection. The default value is 15 seconds. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| thumbnail | th | Downloads image thumbnails corresponding to each image downloaded. | +| | | | +| | | Thumbnails are saved in their own sub-directories inside of the main directory. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| language | la | Defines the language filter. The search results are automatically returned in that language | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| prefix | pr | A word that you would want to prefix in front of actual image name. | +| | | | +| | | This feature can be used to rename files for image identification purpose. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| help | h | show the help message regarding the usage of the above arguments | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ **Note:** If ``single_image`` or ``url`` parameter is not present, then keywords is a mandatory parameter. No other parameters are mandatory. diff --git a/setup.py b/setup.py index 29f9b083..cfdd02b7 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from codecs import open from os import path -__version__ = '1.0.5' +__version__ = '1.0.6' here = path.abspath(path.dirname(__file__)) From e085147d8dd1d6c64cf2b16cbd0f50d2bcae8fa2 Mon Sep 17 00:00:00 2001 From: Vasa Date: Thu, 15 Mar 2018 11:25:31 -0700 Subject: [PATCH 14/83] keywords_from_file errors on windows #51 fixed error on if less than desired images are returned #52 more options on file size as seen on the google advance search option added time range feature fixed the bug on .jpeg images fixed the bug on downloading images from specific websites removed printing the entire file when using keywords from file --- .../google_images_download.py | 30 ++++++++++++++----- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index 4a1ac3b1..b5dcece5 100644 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -46,11 +46,12 @@ parser.add_argument('-r', '--usage_rights', help='usage rights', type=str, required=False, choices=['labled-for-reuse-with-modifications','labled-for-reuse','labled-for-noncommercial-reuse-with-modification','labled-for-nocommercial-reuse']) parser.add_argument('-s', '--size', help='image size', type=str, required=False, - choices=['large','medium','icon']) + choices=['large','medium','icon','>400*300','>640*480','>800*600','>1024*768','>2MP','>4MP','>6MP','>8MP','>10MP','>12MP','>15MP','>20MP','>40MP','>70MP']) parser.add_argument('-t', '--type', help='image type', type=str, required=False, choices=['face','photo','clip-art','line-drawing','animated']) parser.add_argument('-w', '--time', help='image age', type=str, required=False, choices=['past-24-hours','past-7-days']) +parser.add_argument('-wr', '--time_range', help='time range for the age of the image. should be in the format {"time_min":"MM/DD/YYYY","time_max":"MM/DD/YYYY"}', type=str, required=False) parser.add_argument('-a', '--aspect_ratio', help='comma separated additional words added to keywords', type=str, required=False, choices=['tall', 'square', 'wide', 'panoramic']) parser.add_argument('-si', '--similar_images', help='downloads images very similar to the image URL you provide', type=str, required=False) @@ -83,20 +84,24 @@ else: search_keyword.append(line.replace('\n', '').replace('\r', '')) # print(line) - print(search_keyword) + #print(search_keyword) elif '.txt' in file_name: for line in f: if line in ['\n', '\r\n']: pass else: # print line - search_keyword.append(line.replace('\n', '')) - print(search_keyword) + search_keyword.append(line.replace('\n', '').replace('\r', '')) + #print(search_keyword) else: print("Invalid file type: Valid file types are either .txt or .csv \n" "exiting...") sys.exit() +# both time and time range should not be allowed in the same query +if args.time and args.time_range: + parser.error('Either time or time range should be used in a query. Both cannot be used at the same time.') + #Additional words added to keywords if args.suffix_keywords: suffix_keywords = [" " + str(sk) for sk in args.suffix_keywords.split(',')] @@ -288,12 +293,19 @@ def build_url_parameters(): else: lang_url = '' + if args.time_range: + json_acceptable_string = args.time_range.replace("'", "\"") + d = json.loads(json_acceptable_string) + time_range = '&cdr:1,cd_min:' + d['time_min'] + ',cd_max:' + d['time_min'] + else: + time_range = '' + built_url = "&tbs=" counter = 0 params = {'color':[args.color,{'red':'ic:specific,isc:red', 'orange':'ic:specific,isc:orange', 'yellow':'ic:specific,isc:yellow', 'green':'ic:specific,isc:green', 'teal':'ic:specific,isc:teel', 'blue':'ic:specific,isc:blue', 'purple':'ic:specific,isc:purple', 'pink':'ic:specific,isc:pink', 'white':'ic:specific,isc:white', 'gray':'ic:specific,isc:gray', 'black':'ic:specific,isc:black', 'brown':'ic:specific,isc:brown'}], 'color_type':[args.color_type,{'full-color':'ic:color', 'black-and-white':'ic:gray','transparent':'ic:trans'}], 'usage_rights':[args.usage_rights,{'labled-for-reuse-with-modifications':'sur:fmc','labled-for-reuse':'sur:fc','labled-for-noncommercial-reuse-with-modification':'sur:fm','labled-for-nocommercial-reuse':'sur:f'}], - 'size':[args.size,{'large':'isz:l','medium':'isz:m','icon':'isz:i'}], + 'size':[args.size,{'large':'isz:l','medium':'isz:m','icon':'isz:i','>400*300':'isz:lt,islt:qsvga','>640*480':'isz:lt,islt:vga','>800*600':'isz:lt,islt:svga','>1024*768':'visz:lt,islt:xga','>2MP':'isz:lt,islt:2mp','>4MP':'isz:lt,islt:4mp','>6MP':'isz:lt,islt:6mp','>8MP':'isz:lt,islt:8mp','>10MP':'isz:lt,islt:10mp','>12MP':'isz:lt,islt:12mp','>15MP':'isz:lt,islt:15mp','>20MP':'isz:lt,islt:20mp','>40MP':'isz:lt,islt:40mp','>70MP':'isz:lt,islt:70mp'}], 'type':[args.type,{'face':'itp:face','photo':'itp:photo','clip-art':'itp:clip-art','line-drawing':'itp:lineart','animated':'itp:animated'}], 'time':[args.time,{'past-24-hours':'qdr:d','past-7-days':'qdr:w'}], 'aspect_ratio':[args.aspect_ratio,{'tall':'iar:t','square':'iar:s','wide':'iar:w','panoramic':'iar:xw'}], @@ -309,7 +321,7 @@ def build_url_parameters(): else: built_url = built_url + ',' + ext_param counter += 1 - built_url = lang_url+built_url + built_url = lang_url+built_url+time_range return built_url #building main search URL @@ -322,7 +334,7 @@ def build_search_url(search_term,params): url = 'https://www.google.com/search?q=' + keywordem + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg' elif args.specific_site: url = 'https://www.google.com/search?q=' + quote( - search_term) + 'site:' + args.specific_site + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch' + params + '&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg' + search_term) + '&as_sitesearch=' + args.specific_site + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch' + params + '&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg' else: url = 'https://www.google.com/search?q=' + quote( search_term) + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch' + params + '&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg' @@ -461,6 +473,8 @@ def download_image(image_url,image_format,main_directory,dir_name,count): # remove everything after the image name if image_format == "": image_name = image_name + "." + "jpg" + elif image_format == "jpeg": + image_name = image_name[:image_name.find(image_format) + 4] else: image_name = image_name[:image_name.find(image_format) + 3] @@ -507,7 +521,7 @@ def download_image(image_url,image_format,main_directory,dir_name,count): # Finding 'Next Image' from the given raw page def _get_next_item(s): - start_line = s.find('rg_di') + start_line = s.find('rg_meta notranslate') if start_line == -1: # If no links are found then give an error! end_quote = 0 link = "no_links" From 3684a637c88a59ebd859d66ec675de410332bd28 Mon Sep 17 00:00:00 2001 From: Vasa Date: Thu, 15 Mar 2018 14:04:16 -0700 Subject: [PATCH 15/83] added details of the new deatures corrected a typo as per #54 other minor changes --- README.rst | 13 +++++++++++-- setup.py | 2 +- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index befdac05..8a971c5a 100644 --- a/README.rst +++ b/README.rst @@ -116,7 +116,8 @@ Arguments +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | size | s | Denotes the relative size of the image to be downloaded. | | | | | -| | | `Possible values: large, medium, icon` | +| | | `Possible values: large, medium, icon, >400*300, >640*480, >800*600, >1024*768, >2MP, >4MP, >6MP, >8MP, >10MP, | +| | | >12MP, >15MP, >20MP, >40MP, >70MP` | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | aspect_ratio | a | Denotes the aspect ratio of images to download. | | | | | @@ -130,6 +131,10 @@ Arguments | | | | | | | `Possible values: past-24-hours, past-7-days` | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| time_range | wr | Denotes the time range for which you want to search the images | +| | | | +| | | The value of this parameter should be in the following format '{"time_min":"MM/DD/YYYY","time_max":"MM/DD/YYYY"}' | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | delay | d | Time to wait between downloading two images | | | | | | | | Time is to be specified in seconds. But you can have sub-second times by using decimal points. | @@ -174,6 +179,10 @@ Arguments | | | Thumbnails are saved in their own sub-directories inside of the main directory. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | language | la | Defines the language filter. The search results are automatically returned in that language | +| | | | +| | | `Possible Values: Arabic, Chinese (Simplified), Chinese (Traditional), Czech, Danish, Dutch, English, Estonian. Finnish, | +| | | French, German, Greek, Hebrew, Hungarian, Icelandic, Italian, Japanese, Korean, Latvianm, Lithuanian, Norwegian, Portuguese, | +| | | Polish, Romanian, Russian, Spanish, Swedish, Turkish` | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | prefix | pr | A word that you would want to prefix in front of actual image name. | | | | | @@ -356,7 +365,7 @@ Disclaimer ========== This program lets you download tons of images from Google. -Please do not download any image without violating its copyright terms. +Please do not download or use any image that violates its copyright terms. Google Images is a search engine that merely indexes images and allows you to find them. It does NOT produce its own images and, as such, it doesn't own copyright on any of them. The original creators of the images own the copyrights. diff --git a/setup.py b/setup.py index cfdd02b7..3af0fc46 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from codecs import open from os import path -__version__ = '1.0.6' +__version__ = '1.0.7' here = path.abspath(path.dirname(__file__)) From 735d8f795cd3b5313b3a2a4be4d28307ca1956f7 Mon Sep 17 00:00:00 2001 From: Vasa Date: Sat, 17 Mar 2018 17:14:42 -0700 Subject: [PATCH 16/83] now thumbnail file name is same as original file name feature to support proxy settings --- .../google_images_download.py | 51 +++++++++---------- 1 file changed, 24 insertions(+), 27 deletions(-) diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index b5dcece5..86b4184b 100644 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -65,6 +65,7 @@ parser.add_argument('-la', '--language', default=False, help="Defines the language filter. The search results are authomatically returned in that language", type=str, required=False, choices=['Arabic','Chinese (Simplified)','Chinese (Traditional)','Czech','Danish','Dutch','English','Estonian','Finnish','French','German','Greek','Hebrew','Hungarian','Icelandic','Italian','Japanese','Korean','Latvian','Lithuanian','Norwegian','Portuguese','Polish','Romanian','Russian','Spanish','Swedish','Turkish']) parser.add_argument('-pr', '--prefix', default=False, help="A word that you would want to prefix in front of each image name", type=str, required=False) +parser.add_argument('-px', '--proxy', help='specify a proxy address and port', type=str, required=False) args = parser.parse_args() @@ -153,6 +154,10 @@ else: print_size = 'no' +if args.proxy: + os.environ["http_proxy"] = args.proxy + os.environ["https_proxy"] = args.proxy + #------ Initialization Complete ------# # Downloading entire Web Document (Raw Page Content) @@ -386,7 +391,7 @@ def create_directories(main_directory, dir_name): # Download Images -def download_image_thumbnail(image_url, image_format, main_directory, dir_name, count): +def download_image_thumbnail(image_url,main_directory,dir_name,return_image_name): if args.print_urls: print("Image URL: " + image_url) try: @@ -400,35 +405,19 @@ def download_image_thumbnail(image_url, image_format, main_directory, dir_name, timeout = 15 response = urlopen(req, None, timeout) - # keep everything after the last '/' - image_name = str(image_url[(image_url.rfind('tbn')) + 4:]) - image_name = image_name.lower() - # if no extension then add it - # remove everything after the image name - if image_format == "": - image_name = image_name + ".jpg" - else: - image_name = image_name + "." + image_format - - #prefix name in image - if args.prefix: - prefix = args.prefix + " " - else: - prefix = '' - - path = main_directory + "/" + dir_name + " - thumbnail" + "/" + prefix + " " + str(count) + ". " + image_name + path = main_directory + "/" + dir_name + " - thumbnail" + "/" + return_image_name output_file = open(path, 'wb') data = response.read() output_file.write(data) response.close() + download_status = 'success' + download_message = "Completed Image Thumbnail ====> " + return_image_name + # image size parameter if args.print_size: print("Image Size: " + str(file_size(path))) - download_status = 'success' - download_message = "Completed Image Thumbnail ====> " + prefix + str(count) + ". " + image_name - except UnicodeEncodeError as e: download_status = 'fail' download_message = "UnicodeEncodeError on an image...trying next one..." + " Error: " + str(e) @@ -490,33 +479,41 @@ def download_image(image_url,image_format,main_directory,dir_name,count): output_file.write(data) response.close() - #image size parameter - if args.print_size: - print("Image Size: " + str(file_size(path))) + #return image name back to calling method to use it for thumbnail downloads + return_image_name = prefix + str(count) + ". " + image_name download_status = 'success' download_message = "Completed Image ====> " + prefix + str(count) + ". " + image_name + # image size parameter + if args.print_size: + print("Image Size: " + str(file_size(path))) + except UnicodeEncodeError as e: download_status = 'fail' download_message = "UnicodeEncodeError on an image...trying next one..." + " Error: " + str(e) + return_image_name = '' except HTTPError as e: # If there is any HTTPError download_status = 'fail' download_message = "HTTPError on an image...trying next one..." + " Error: " + str(e) + return_image_name = '' except URLError as e: download_status = 'fail' download_message = "URLError on an image...trying next one..." + " Error: " + str(e) + return_image_name = '' except ssl.CertificateError as e: download_status = 'fail' download_message = "CertificateError on an image...trying next one..." + " Error: " + str(e) + return_image_name = '' except IOError as e: # If there is any IOError download_status = 'fail' download_message = "IOError on an image...trying next one..." + " Error: " + str(e) - return download_status,download_message + return_image_name = '' + return download_status,download_message,return_image_name # Finding 'Next Image' from the given raw page @@ -562,13 +559,13 @@ def _get_all_items(page,main_directory,dir_name,limit): items.append(object) # Append all the links in the list named 'Links' #download the images - download_status,download_message = download_image(object['image_link'],object['image_format'],main_directory,dir_name,count) + download_status,download_message,return_image_name = download_image(object['image_link'],object['image_format'],main_directory,dir_name,count) print(download_message) if download_status == "success": # download image_thumbnails if args.thumbnail: - download_status, download_message_thumbnail = download_image_thumbnail(object['image_thumbnail_url'],object['image_format'],main_directory, dir_name, count) + download_status, download_message_thumbnail = download_image_thumbnail(object['image_thumbnail_url'],main_directory,dir_name,return_image_name) print(download_message_thumbnail) count += 1 From bff2bca58f81b739b1b98858c218488a3abb71b9 Mon Sep 17 00:00:00 2001 From: Vasa Date: Sat, 17 Mar 2018 17:25:06 -0700 Subject: [PATCH 17/83] doc update --- README.rst | 4 ++++ setup.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 8a971c5a..2867cf8c 100644 --- a/README.rst +++ b/README.rst @@ -150,6 +150,10 @@ Arguments | | | | | | | If not specified, it will default to 'downloads' directory. This directory is located in the path from where you run this code| +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| proxy | px | Allows you to specify proxy server setting for all your requests | +| | | | +| | | You can specify the proxy settings in 'IP:Port' format | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | similar_images | si | Reverse Image Search. | | | | | | | | Searches and downloads images that are similar to the absolute image link/url you provide. | diff --git a/setup.py b/setup.py index 3af0fc46..c137ad92 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from codecs import open from os import path -__version__ = '1.0.7' +__version__ = '1.0.8' here = path.abspath(path.dirname(__file__)) From fa3dc7000c30afd191a4f4cfdd4a5f2f8299f456 Mon Sep 17 00:00:00 2001 From: Vasa Date: Mon, 19 Mar 2018 10:02:52 -0700 Subject: [PATCH 18/83] ungate the 100 images limit allow users to download as many images as the google search returns bug fix on JSON errors on non-escaped quotes --- .../google_images_download.py | 54 +++++++++++++++---- 1 file changed, 45 insertions(+), 9 deletions(-) diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index 86b4184b..c8cbd16c 100644 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -112,8 +112,6 @@ # Setting limit on number of images to be downloaded if args.limit: limit = int(args.limit) - if int(args.limit) >= 100: - limit = 100 else: limit = 100 @@ -189,6 +187,34 @@ def download_page(url): except: return "Page Not found" +# Download Page for more than 100 images +def download_extended_page(url): + try: + from selenium import webdriver + scrolls = 5 + driver = webdriver.Firefox() + driver.get(url) + for scroll in range(scrolls): + for page_scroll in range(10): + driver.execute_script("window.scrollBy(0, 10000)") + time.sleep(0.5) + time.sleep(1) + try: + driver.find_element_by_xpath("//input[@value='Show more results']").click() + except: + print("End of page reached...") + break + version = (3, 0) + cur_version = sys.version_info + if cur_version >= version: # If the Current Version of Python is 3.0 or above + page = driver.page_source + else: #python 2 + page = driver.page_source.encode('utf-8') + driver.quit() + return page + except: + return "Page Not found" + #Correcting the escape characters for python2 def replace_with_byte(match): return chr(int(match.group(0)[1:], 8)) @@ -402,7 +428,7 @@ def download_image_thumbnail(image_url,main_directory,dir_name,return_image_name if args.socket_timeout: timeout = float(args.socket_timeout) else: - timeout = 15 + timeout = 10 response = urlopen(req, None, timeout) path = main_directory + "/" + dir_name + " - thumbnail" + "/" + return_image_name @@ -452,7 +478,7 @@ def download_image(image_url,image_format,main_directory,dir_name,count): if args.socket_timeout: timeout = float(args.socket_timeout) else: - timeout = 15 + timeout = 10 response = urlopen(req, None, timeout) # keep everything after the last '/' @@ -528,15 +554,20 @@ def _get_next_item(s): start_object = s.find('{', start_line + 1) end_object = s.find('', start_object + 1) object_raw = str(s[start_object:end_object]) - #####print(object_raw) #remove escape characters based on python version version = (3, 0) cur_version = sys.version_info if cur_version >= version: #python3 - object_decode = bytes(object_raw, "utf-8").decode("unicode_escape") - final_object = json.loads(object_decode) + try: + object_decode = bytes(object_raw, "utf-8").decode("unicode_escape") + final_object = json.loads(object_decode) + except: + final_object = "" else: #python2 - final_object = (json.loads(repair(object_raw))) + try: + final_object = (json.loads(repair(object_raw))) + except: + final_object = "" return final_object, end_object @@ -550,6 +581,8 @@ def _get_all_items(page,main_directory,dir_name,limit): object, end_content = _get_next_item(page) if object == "no_links": break + elif object == "": + page = page[end_content:] else: #format the item for readability object = format_object(object) @@ -606,7 +639,10 @@ def bulk_download(search_keyword,suffix_keywords,limit,main_directory): url = build_search_url(search_term,params) #building main search url - raw_html = (download_page(url)) #download page + if limit < 101: + raw_html = download_page(url) # download page + else: + raw_html = download_extended_page(url) print("Starting Download...") items,errorCount = _get_all_items(raw_html,main_directory,dir_name,limit) #get all image items and download images From bb48f5844b4f1fbae9351d521779223699c8d168 Mon Sep 17 00:00:00 2001 From: Vasa Date: Mon, 19 Mar 2018 10:36:30 -0700 Subject: [PATCH 19/83] doc update --- README.rst | 32 +++++++++++++++++++++++++++----- requirements.txt | 1 + setup.py | 2 +- 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/README.rst b/README.rst index 2867cf8c..7b5d7c09 100644 --- a/README.rst +++ b/README.rst @@ -11,9 +11,12 @@ Summary ======= This is a command line python program to search keywords/key-phrases on Google Images -and then also optionally download one or more images to your computer. -This is a small program which is ready-to-run, but still under development. -Many more features will be added to it going forward. +and optionally download images to your computer. + +This is a small and ready-to-run program. No dependencies are required to be installed +if you would only want to download up to 100 images per keyword. If you would want more than 100 +images per keyword, then you would need to install ``Selenium`` library along with ``geckodriver``. +Detailed instructions in the troubleshooting section. Compatability @@ -91,7 +94,9 @@ Arguments +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | limit | l | Denotes number of images that you want to download. | | | | | -| | | As of now you can select anything between 1 and 100. If this value is not specified, it defaults to 100. | +| | | You can specify any integer value here. It will try and get all the images that it finds in the google image search page. | +| | | | +| | | If this value is not specified, it defaults to 100. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | format | f | Denotes the format/extension of the image that you want to download. | | | | | @@ -176,7 +181,7 @@ Arguments +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | socket_timeout | st | Allows you to specify the time to wait for socket connection. | | | | | -| | | You could specify a higher timeout time for slow internet connection. The default value is 15 seconds. | +| | | You could specify a higher timeout time for slow internet connection. The default value is 10 seconds. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | thumbnail | th | Downloads image thumbnails corresponding to each image downloaded. | | | | | @@ -347,6 +352,23 @@ On MAC and Linux, when you get permission denied when installing the library usi You can also run pip install as a superuser with ``sudo pip install google_images_download`` but it is not generally a good idea because it can cause issues with your system-level packages. + +**## Installing the geckodriver (with Selenium)** + +If you would want to download more than 100 images per keyword, then you will need to install 'selenium' along with geckodriver. + +If you have pip installed the library or run the setup.py file, Selenium would have automatically installed on your machine. For geckidriver: + +`Download the correct geckodriver `__ based on your operating system. Below example shows how to install it for Linux operating system. + +.. code-block:: bash + + $ wget https://github.com/mozilla/geckodriver/releases/download/v0.20.0/geckodriver-v0.20.0-linux64.tar.gz + $ tar -xvzf geckodriver* + $ chmod +x geckodriver + $ export PATH=$PATH:/path-to-extracted-file/geckodriver + + Structure ========= diff --git a/requirements.txt b/requirements.txt index e69de29b..954f0db0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -0,0 +1 @@ +selenium \ No newline at end of file diff --git a/setup.py b/setup.py index c137ad92..904ceaec 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from codecs import open from os import path -__version__ = '1.0.8' +__version__ = '1.1.0' here = path.abspath(path.dirname(__file__)) From f3783c11ca0f3267d168bb552d0515016dfbb527 Mon Sep 17 00:00:00 2001 From: Vasa Date: Mon, 19 Mar 2018 11:06:05 -0700 Subject: [PATCH 20/83] doc update --- README.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 7b5d7c09..ee63c510 100644 --- a/README.rst +++ b/README.rst @@ -357,7 +357,7 @@ You can also run pip install as a superuser with ``sudo pip install google_image If you would want to download more than 100 images per keyword, then you will need to install 'selenium' along with geckodriver. -If you have pip installed the library or run the setup.py file, Selenium would have automatically installed on your machine. For geckidriver: +If you have pip installed the library or run the setup.py file, Selenium would have automatically installed on your machine. You will also need Firefox browser on your machine. For geckidriver: `Download the correct geckodriver `__ based on your operating system. Below example shows how to install it for Linux operating system. @@ -368,6 +368,9 @@ If you have pip installed the library or run the setup.py file, Selenium would h $ chmod +x geckodriver $ export PATH=$PATH:/path-to-extracted-file/geckodriver +For **Windows** if for some reason the geckodriver gives you trouble, download it under the current directory and run the command. + +On **Linux** if you get errors related to ``libmozgtk.so``, refer to this `Link `__ Structure ========= From 2922c678803d50c9df26d07e8e2a7e30b12759ed Mon Sep 17 00:00:00 2001 From: Vasa Date: Mon, 26 Mar 2018 14:14:33 -0700 Subject: [PATCH 21/83] config file as input in addition to CLI input removed dependency on arguments in the main code other minor updates --- .../google_images_download.py | 378 +++++++++--------- 1 file changed, 200 insertions(+), 178 deletions(-) diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index c8cbd16c..a57a94fc 100644 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -27,136 +27,69 @@ import re import codecs -# Taking command line arguments from users -parser = argparse.ArgumentParser() -parser.add_argument('-k', '--keywords', help='delimited list input', type=str, required=False) -parser.add_argument('-kf', '--keywords_from_file', help='extract list of keywords from a text file', type=str, required=False) -parser.add_argument('-sk', '--suffix_keywords', help='comma separated additional words added to main keyword', type=str, required=False) -parser.add_argument('-l', '--limit', help='delimited list input', type=str, required=False) -parser.add_argument('-f', '--format', help='download images with specific format', type=str, required=False, - choices=['jpg', 'gif', 'png', 'bmp', 'svg', 'webp', 'ico']) -parser.add_argument('-u', '--url', help='search with google image URL', type=str, required=False) -parser.add_argument('-x', '--single_image', help='downloading a single image from URL', type=str, required=False) -parser.add_argument('-o', '--output_directory', help='download images in a specific directory', type=str, required=False) -parser.add_argument('-d', '--delay', help='delay in seconds to wait between downloading two images', type=str, required=False) -parser.add_argument('-c', '--color', help='filter on color', type=str, required=False, - choices=['red', 'orange', 'yellow', 'green', 'teal', 'blue', 'purple', 'pink', 'white', 'gray', 'black', 'brown']) -parser.add_argument('-ct', '--color_type', help='filter on color', type=str, required=False, - choices=['full-color', 'black-and-white', 'transparent']) -parser.add_argument('-r', '--usage_rights', help='usage rights', type=str, required=False, - choices=['labled-for-reuse-with-modifications','labled-for-reuse','labled-for-noncommercial-reuse-with-modification','labled-for-nocommercial-reuse']) -parser.add_argument('-s', '--size', help='image size', type=str, required=False, - choices=['large','medium','icon','>400*300','>640*480','>800*600','>1024*768','>2MP','>4MP','>6MP','>8MP','>10MP','>12MP','>15MP','>20MP','>40MP','>70MP']) -parser.add_argument('-t', '--type', help='image type', type=str, required=False, - choices=['face','photo','clip-art','line-drawing','animated']) -parser.add_argument('-w', '--time', help='image age', type=str, required=False, - choices=['past-24-hours','past-7-days']) -parser.add_argument('-wr', '--time_range', help='time range for the age of the image. should be in the format {"time_min":"MM/DD/YYYY","time_max":"MM/DD/YYYY"}', type=str, required=False) -parser.add_argument('-a', '--aspect_ratio', help='comma separated additional words added to keywords', type=str, required=False, - choices=['tall', 'square', 'wide', 'panoramic']) -parser.add_argument('-si', '--similar_images', help='downloads images very similar to the image URL you provide', type=str, required=False) -parser.add_argument('-ss', '--specific_site', help='downloads images that are indexed from a specific website', type=str, required=False) -parser.add_argument('-p', '--print_urls', default=False, help="Print the URLs of the images", action="store_true") -parser.add_argument('-ps', '--print_size', default=False, help="Print the size of the images on disk", action="store_true") -parser.add_argument('-m', '--metadata', default=False, help="Print the metadata of the image", action="store_true") -parser.add_argument('-e', '--extract_metadata', default=False, help="Dumps all the logs into a text file", action="store_true") -parser.add_argument('-st', '--socket_timeout', default=False, help="Connection timeout waiting for the image to download", type=float) -parser.add_argument('-th', '--thumbnail', default=False, help="Downloads image thumbnail along with the actual image", action="store_true") -parser.add_argument('-la', '--language', default=False, help="Defines the language filter. The search results are authomatically returned in that language", type=str, required=False, - choices=['Arabic','Chinese (Simplified)','Chinese (Traditional)','Czech','Danish','Dutch','English','Estonian','Finnish','French','German','Greek','Hebrew','Hungarian','Icelandic','Italian','Japanese','Korean','Latvian','Lithuanian','Norwegian','Portuguese','Polish','Romanian','Russian','Spanish','Swedish','Turkish']) -parser.add_argument('-pr', '--prefix', default=False, help="A word that you would want to prefix in front of each image name", type=str, required=False) -parser.add_argument('-px', '--proxy', help='specify a proxy address and port', type=str, required=False) - -args = parser.parse_args() - -#Initialization and Validation of user arguments -if args.keywords: - search_keyword = [str(item) for item in args.keywords.split(',')] - -#Initialization and Validation of user arguments -if args.keywords_from_file: - search_keyword = [] - file_name = args.keywords_from_file - with codecs.open(file_name, 'r', encoding='utf-8-sig') as f: - if '.csv' in file_name: - for line in f: - if line in ['\n', '\r\n']: - pass - else: - search_keyword.append(line.replace('\n', '').replace('\r', '')) - # print(line) - #print(search_keyword) - elif '.txt' in file_name: - for line in f: - if line in ['\n', '\r\n']: - pass - else: - # print line - search_keyword.append(line.replace('\n', '').replace('\r', '')) - #print(search_keyword) - else: - print("Invalid file type: Valid file types are either .txt or .csv \n" - "exiting...") - sys.exit() - -# both time and time range should not be allowed in the same query -if args.time and args.time_range: - parser.error('Either time or time range should be used in a query. Both cannot be used at the same time.') - -#Additional words added to keywords -if args.suffix_keywords: - suffix_keywords = [" " + str(sk) for sk in args.suffix_keywords.split(',')] -else: - suffix_keywords = [] - -# Setting limit on number of images to be downloaded -if args.limit: - limit = int(args.limit) -else: - limit = 100 - -if args.url: - current_time = str(datetime.datetime.now()).split('.')[0] - search_keyword = [current_time.replace(":", "_")] - -if args.similar_images: - current_time = str(datetime.datetime.now()).split('.')[0] - search_keyword = [current_time.replace(":", "_")] - -# If single_image or url argument not present then keywords is mandatory argument -if args.single_image is None and args.url is None and args.similar_images is None and args.keywords is None and args.keywords_from_file is None: - parser.error('Keywords is a required argument!') - -# If this argument is present, set the custom output directory -if args.output_directory: - main_directory = args.output_directory +config = argparse.ArgumentParser() +config.add_argument('-cf', '--config_file', help='config file name', default='', type=str, required=False) +config_file_check = config.parse_known_args() +object_check = vars(config_file_check[0]) + +if object_check['config_file'] != '': + args_list = ["keywords","keywords_from_file","suffix_keywords","limit","format","url","single_image","output_directory","delay","color","color_type","usage_rights","size","type","time","time_range","aspect_ratio","similar_images","specific_site","print_urls","print_size","metadata","extract_metadata","socket_timeout","thumbnail","language","prefix","proxy"] + records = [] + json_file = json.load(open(config_file_check[0].config_file)) + for record in range(0,len(json_file['Records'])): + arguments = {} + for i in args_list: + arguments[i] = None + for key, value in json_file['Records'][record].items(): + arguments[key] = value + records.append(arguments) + records_count = len(records) else: - main_directory = "downloads" - -# Set the delay parameter if this argument is present -if args.delay: - try: - delay_time = int(args.delay) - except ValueError: - parser.error('Delay parameter should be an integer!') -else: - delay_time = 0 - -if args.print_urls: - print_url = 'yes' -else: - print_url = 'no' - -if args.print_size: - print_size = 'yes' -else: - print_size = 'no' - -if args.proxy: - os.environ["http_proxy"] = args.proxy - os.environ["https_proxy"] = args.proxy - -#------ Initialization Complete ------# + # Taking command line arguments from users + parser = argparse.ArgumentParser() + parser.add_argument('-k', '--keywords', help='delimited list input', type=str, required=False) + parser.add_argument('-kf', '--keywords_from_file', help='extract list of keywords from a text file', type=str, required=False) + parser.add_argument('-sk', '--suffix_keywords', help='comma separated additional words added to main keyword', type=str, required=False) + parser.add_argument('-l', '--limit', help='delimited list input', type=str, required=False) + parser.add_argument('-f', '--format', help='download images with specific format', type=str, required=False, + choices=['jpg', 'gif', 'png', 'bmp', 'svg', 'webp', 'ico']) + parser.add_argument('-u', '--url', help='search with google image URL', type=str, required=False) + parser.add_argument('-x', '--single_image', help='downloading a single image from URL', type=str, required=False) + parser.add_argument('-o', '--output_directory', help='download images in a specific directory', type=str, required=False) + parser.add_argument('-d', '--delay', help='delay in seconds to wait between downloading two images', type=str, required=False) + parser.add_argument('-c', '--color', help='filter on color', type=str, required=False, + choices=['red', 'orange', 'yellow', 'green', 'teal', 'blue', 'purple', 'pink', 'white', 'gray', 'black', 'brown']) + parser.add_argument('-ct', '--color_type', help='filter on color', type=str, required=False, + choices=['full-color', 'black-and-white', 'transparent']) + parser.add_argument('-r', '--usage_rights', help='usage rights', type=str, required=False, + choices=['labled-for-reuse-with-modifications','labled-for-reuse','labled-for-noncommercial-reuse-with-modification','labled-for-nocommercial-reuse']) + parser.add_argument('-s', '--size', help='image size', type=str, required=False, + choices=['large','medium','icon','>400*300','>640*480','>800*600','>1024*768','>2MP','>4MP','>6MP','>8MP','>10MP','>12MP','>15MP','>20MP','>40MP','>70MP']) + parser.add_argument('-t', '--type', help='image type', type=str, required=False, + choices=['face','photo','clip-art','line-drawing','animated']) + parser.add_argument('-w', '--time', help='image age', type=str, required=False, + choices=['past-24-hours','past-7-days']) + parser.add_argument('-wr', '--time_range', help='time range for the age of the image. should be in the format {"time_min":"MM/DD/YYYY","time_max":"MM/DD/YYYY"}', type=str, required=False) + parser.add_argument('-a', '--aspect_ratio', help='comma separated additional words added to keywords', type=str, required=False, + choices=['tall', 'square', 'wide', 'panoramic']) + parser.add_argument('-si', '--similar_images', help='downloads images very similar to the image URL you provide', type=str, required=False) + parser.add_argument('-ss', '--specific_site', help='downloads images that are indexed from a specific website', type=str, required=False) + parser.add_argument('-p', '--print_urls', default=False, help="Print the URLs of the images", action="store_true") + parser.add_argument('-ps', '--print_size', default=False, help="Print the size of the images on disk", action="store_true") + parser.add_argument('-m', '--metadata', default=False, help="Print the metadata of the image", action="store_true") + parser.add_argument('-e', '--extract_metadata', default=False, help="Dumps all the logs into a text file", action="store_true") + parser.add_argument('-st', '--socket_timeout', default=False, help="Connection timeout waiting for the image to download", type=float) + parser.add_argument('-th', '--thumbnail', default=False, help="Downloads image thumbnail along with the actual image", action="store_true") + parser.add_argument('-la', '--language', default=False, help="Defines the language filter. The search results are authomatically returned in that language", type=str, required=False, + choices=['Arabic','Chinese (Simplified)','Chinese (Traditional)','Czech','Danish','Dutch','English','Estonian','Finnish','French','German','Greek','Hebrew','Hungarian','Icelandic','Italian','Japanese','Korean','Latvian','Lithuanian','Norwegian','Portuguese','Polish','Romanian','Russian','Spanish','Swedish','Turkish']) + parser.add_argument('-pr', '--prefix', default=False, help="A word that you would want to prefix in front of each image name", type=str, required=False) + parser.add_argument('-px', '--proxy', help='specify a proxy address and port', type=str, required=False) + + args = parser.parse_args() + arguments = vars(args) + records = [] + records.append(arguments) + records_count = len(records) # Downloading entire Web Document (Raw Page Content) def download_page(url): @@ -239,7 +172,7 @@ def format_object(object): #function to download single image def single_image(): - url = args.single_image + url = arguments['single_image'] try: os.makedirs(main_directory) except OSError as e: @@ -269,7 +202,7 @@ def similar_images(): cur_version = sys.version_info if cur_version >= version: # If the Current Version of Python is 3.0 or above try: - searchUrl = 'https://www.google.com/searchbyimage?site=search&sa=X&image_url=' + args.similar_images + searchUrl = 'https://www.google.com/searchbyimage?site=search&sa=X&image_url=' + arguments['similar_images'] headers = {} headers['User-Agent'] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36" @@ -292,7 +225,7 @@ def similar_images(): return "Cloud not connect to Google Images endpoint" else: # If the Current Version of Python is 2.x try: - searchUrl = 'https://www.google.com/searchbyimage?site=search&sa=X&image_url=' + args.similar_images + searchUrl = 'https://www.google.com/searchbyimage?site=search&sa=X&image_url=' + arguments['similar_images'] headers = {} headers['User-Agent'] = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17" @@ -317,15 +250,15 @@ def similar_images(): #Building URL parameters def build_url_parameters(): - if args.language: + if arguments['language']: lang = "&lr=" lang_param = {"Arabic":"lang_ar","Chinese (Simplified)":"lang_zh-CN","Chinese (Traditional)":"lang_zh-TW","Czech":"lang_cs","Danish":"lang_da","Dutch":"lang_nl","English":"lang_en","Estonian":"lang_et","Finnish":"lang_fi","French":"lang_fr","German":"lang_de","Greek":"lang_el","Hebrew":"lang_iw ","Hungarian":"lang_hu","Icelandic":"lang_is","Italian":"lang_it","Japanese":"lang_ja","Korean":"lang_ko","Latvian":"lang_lv","Lithuanian":"lang_lt","Norwegian":"lang_no","Portuguese":"lang_pt","Polish":"lang_pl","Romanian":"lang_ro","Russian":"lang_ru","Spanish":"lang_es","Swedish":"lang_sv","Turkish":"lang_tr"} - lang_url = lang+lang_param[args.language] + lang_url = lang+lang_param[arguments['language']] else: lang_url = '' - if args.time_range: - json_acceptable_string = args.time_range.replace("'", "\"") + if arguments['time_range']: + json_acceptable_string = arguments['time_range'].replace("'", "\"") d = json.loads(json_acceptable_string) time_range = '&cdr:1,cd_min:' + d['time_min'] + ',cd_max:' + d['time_min'] else: @@ -333,14 +266,14 @@ def build_url_parameters(): built_url = "&tbs=" counter = 0 - params = {'color':[args.color,{'red':'ic:specific,isc:red', 'orange':'ic:specific,isc:orange', 'yellow':'ic:specific,isc:yellow', 'green':'ic:specific,isc:green', 'teal':'ic:specific,isc:teel', 'blue':'ic:specific,isc:blue', 'purple':'ic:specific,isc:purple', 'pink':'ic:specific,isc:pink', 'white':'ic:specific,isc:white', 'gray':'ic:specific,isc:gray', 'black':'ic:specific,isc:black', 'brown':'ic:specific,isc:brown'}], - 'color_type':[args.color_type,{'full-color':'ic:color', 'black-and-white':'ic:gray','transparent':'ic:trans'}], - 'usage_rights':[args.usage_rights,{'labled-for-reuse-with-modifications':'sur:fmc','labled-for-reuse':'sur:fc','labled-for-noncommercial-reuse-with-modification':'sur:fm','labled-for-nocommercial-reuse':'sur:f'}], - 'size':[args.size,{'large':'isz:l','medium':'isz:m','icon':'isz:i','>400*300':'isz:lt,islt:qsvga','>640*480':'isz:lt,islt:vga','>800*600':'isz:lt,islt:svga','>1024*768':'visz:lt,islt:xga','>2MP':'isz:lt,islt:2mp','>4MP':'isz:lt,islt:4mp','>6MP':'isz:lt,islt:6mp','>8MP':'isz:lt,islt:8mp','>10MP':'isz:lt,islt:10mp','>12MP':'isz:lt,islt:12mp','>15MP':'isz:lt,islt:15mp','>20MP':'isz:lt,islt:20mp','>40MP':'isz:lt,islt:40mp','>70MP':'isz:lt,islt:70mp'}], - 'type':[args.type,{'face':'itp:face','photo':'itp:photo','clip-art':'itp:clip-art','line-drawing':'itp:lineart','animated':'itp:animated'}], - 'time':[args.time,{'past-24-hours':'qdr:d','past-7-days':'qdr:w'}], - 'aspect_ratio':[args.aspect_ratio,{'tall':'iar:t','square':'iar:s','wide':'iar:w','panoramic':'iar:xw'}], - 'format':[args.format,{'jpg':'ift:jpg','gif':'ift:gif','png':'ift:png','bmp':'ift:bmp','svg':'ift:svg','webp':'webp','ico':'ift:ico'}]} + params = {'color':[arguments['color'],{'red':'ic:specific,isc:red', 'orange':'ic:specific,isc:orange', 'yellow':'ic:specific,isc:yellow', 'green':'ic:specific,isc:green', 'teal':'ic:specific,isc:teel', 'blue':'ic:specific,isc:blue', 'purple':'ic:specific,isc:purple', 'pink':'ic:specific,isc:pink', 'white':'ic:specific,isc:white', 'gray':'ic:specific,isc:gray', 'black':'ic:specific,isc:black', 'brown':'ic:specific,isc:brown'}], + 'color_type':[arguments['color_type'],{'full-color':'ic:color', 'black-and-white':'ic:gray','transparent':'ic:trans'}], + 'usage_rights':[arguments['usage_rights'],{'labled-for-reuse-with-modifications':'sur:fmc','labled-for-reuse':'sur:fc','labled-for-noncommercial-reuse-with-modification':'sur:fm','labled-for-nocommercial-reuse':'sur:f'}], + 'size':[arguments['size'],{'large':'isz:l','medium':'isz:m','icon':'isz:i','>400*300':'isz:lt,islt:qsvga','>640*480':'isz:lt,islt:vga','>800*600':'isz:lt,islt:svga','>1024*768':'visz:lt,islt:xga','>2MP':'isz:lt,islt:2mp','>4MP':'isz:lt,islt:4mp','>6MP':'isz:lt,islt:6mp','>8MP':'isz:lt,islt:8mp','>10MP':'isz:lt,islt:10mp','>12MP':'isz:lt,islt:12mp','>15MP':'isz:lt,islt:15mp','>20MP':'isz:lt,islt:20mp','>40MP':'isz:lt,islt:40mp','>70MP':'isz:lt,islt:70mp'}], + 'type':[arguments['type'],{'face':'itp:face','photo':'itp:photo','clip-art':'itp:clip-art','line-drawing':'itp:lineart','animated':'itp:animated'}], + 'time':[arguments['time'],{'past-24-hours':'qdr:d','past-7-days':'qdr:w'}], + 'aspect_ratio':[arguments['aspect_ratio'],{'tall':'iar:t','square':'iar:s','wide':'iar:w','panoramic':'iar:xw'}], + 'format':[arguments['format'],{'jpg':'ift:jpg','gif':'ift:gif','png':'ift:png','bmp':'ift:bmp','svg':'ift:svg','webp':'webp','ico':'ift:ico'}]} for key, value in params.items(): if value[0] is not None: ext_param = value[1][value[0]] @@ -358,14 +291,14 @@ def build_url_parameters(): #building main search URL def build_search_url(search_term,params): # check the args and choose the URL - if args.url: - url = args.url - elif args.similar_images: + if arguments['url']: + url = arguments['url'] + elif arguments['similar_images']: keywordem = similar_images() url = 'https://www.google.com/search?q=' + keywordem + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg' - elif args.specific_site: + elif arguments['specific_site']: url = 'https://www.google.com/search?q=' + quote( - search_term) + '&as_sitesearch=' + args.specific_site + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch' + params + '&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg' + search_term) + '&as_sitesearch=' + arguments['specific_site'] + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch' + params + '&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg' else: url = 'https://www.google.com/search?q=' + quote( search_term) + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch' + params + '&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg' @@ -395,7 +328,7 @@ def create_directories(main_directory, dir_name): sub_directory = os.path.join(main_directory, path) if not os.path.exists(sub_directory): os.makedirs(sub_directory) - if args.thumbnail: + if arguments['thumbnail']: sub_directory_thumbnail = os.path.join(main_directory, dir_name_thumbnail) if not os.path.exists(sub_directory_thumbnail): os.makedirs(sub_directory_thumbnail) @@ -404,7 +337,7 @@ def create_directories(main_directory, dir_name): sub_directory = os.path.join(main_directory, path) if not os.path.exists(sub_directory): os.makedirs(sub_directory) - if args.thumbnail: + if arguments['thumbnail']: sub_directory_thumbnail = os.path.join(main_directory, dir_name_thumbnail) if not os.path.exists(sub_directory_thumbnail): os.makedirs(sub_directory_thumbnail) @@ -418,15 +351,15 @@ def create_directories(main_directory, dir_name): # Download Images def download_image_thumbnail(image_url,main_directory,dir_name,return_image_name): - if args.print_urls: + if arguments['print_urls']: print("Image URL: " + image_url) try: req = Request(image_url, headers={ "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"}) try: # timeout time to download an image - if args.socket_timeout: - timeout = float(args.socket_timeout) + if arguments['socket_timeout']: + timeout = float(arguments['socket_timeout']) else: timeout = 10 response = urlopen(req, None, timeout) @@ -441,7 +374,7 @@ def download_image_thumbnail(image_url,main_directory,dir_name,return_image_name download_message = "Completed Image Thumbnail ====> " + return_image_name # image size parameter - if args.print_size: + if arguments['print_size']: print("Image Size: " + str(file_size(path))) except UnicodeEncodeError as e: @@ -468,15 +401,15 @@ def download_image_thumbnail(image_url,main_directory,dir_name,return_image_name # Download Images def download_image(image_url,image_format,main_directory,dir_name,count): - if args.print_urls: + if arguments['print_urls']: print("Image URL: " + image_url) try: req = Request(image_url, headers={ "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"}) try: # timeout time to download an image - if args.socket_timeout: - timeout = float(args.socket_timeout) + if arguments['socket_timeout']: + timeout = float(arguments['socket_timeout']) else: timeout = 10 response = urlopen(req, None, timeout) @@ -494,8 +427,8 @@ def download_image(image_url,image_format,main_directory,dir_name,count): image_name = image_name[:image_name.find(image_format) + 3] # prefix name in image - if args.prefix: - prefix = args.prefix + " " + if arguments['prefix']: + prefix = arguments['prefix'] + " " else: prefix = '' @@ -512,7 +445,7 @@ def download_image(image_url,image_format,main_directory,dir_name,count): download_message = "Completed Image ====> " + prefix + str(count) + ". " + image_name # image size parameter - if args.print_size: + if arguments['print_size']: print("Image Size: " + str(file_size(path))) except UnicodeEncodeError as e: @@ -586,7 +519,7 @@ def _get_all_items(page,main_directory,dir_name,limit): else: #format the item for readability object = format_object(object) - if args.metadata: + if arguments['metadata']: print("\nImage Metadata" + str(object)) items.append(object) # Append all the links in the list named 'Links' @@ -597,7 +530,7 @@ def _get_all_items(page,main_directory,dir_name,limit): if download_status == "success": # download image_thumbnails - if args.thumbnail: + if arguments['thumbnail']: download_status, download_message_thumbnail = download_image_thumbnail(object['image_thumbnail_url'],main_directory,dir_name,return_image_name) print(download_message_thumbnail) @@ -606,8 +539,8 @@ def _get_all_items(page,main_directory,dir_name,limit): errorCount += 1 #delay param - if args.delay: - time.sleep(int(args.delay)) + if arguments['delay']: + time.sleep(int(arguments['delay'])) page = page[end_content:] i += 1 @@ -631,7 +564,7 @@ def bulk_download(search_keyword,suffix_keywords,limit,main_directory): print(iteration) print("Evaluating...") search_term = search_keyword[i] + sky - dir_name = search_term + ('-' + args.color if args.color else '') #sub-directory + dir_name = search_term + ('-' + arguments['color'] if arguments['color'] else '') #sub-directory create_directories(main_directory,dir_name) #create directories in OS @@ -648,7 +581,7 @@ def bulk_download(search_keyword,suffix_keywords,limit,main_directory): items,errorCount = _get_all_items(raw_html,main_directory,dir_name,limit) #get all image items and download images #dumps into a text file - if args.extract_metadata: + if arguments['extract_metadata']: try: if not os.path.exists("logs"): os.makedirs("logs") @@ -662,17 +595,106 @@ def bulk_download(search_keyword,suffix_keywords,limit,main_directory): return errorCount #------------- Main Program -------------# -if args.single_image: #Download Single Image using a URL - single_image() -else: # or download multiple images based on keywords/keyphrase search - t0 = time.time() # start the timer - errorCount = bulk_download(search_keyword,suffix_keywords,limit,main_directory) - - print("\nEverything downloaded!") - print("Total Errors: " + str(errorCount) + "\n") - t1 = time.time() # stop the timer - total_time = t1 - t0 # Calculating the total time required to crawl, find and download all the links of 60,000 images - print("Total time taken: " + str(total_time) + " Seconds") +for arguments in records: + #Initialization and Validation of user arguments + if arguments['keywords']: + search_keyword = [str(item) for item in arguments['keywords'].split(',')] + + #Initialization and Validation of user arguments + if arguments['keywords_from_file']: + search_keyword = [] + file_name = arguments['keywords_from_file'] + with codecs.open(file_name, 'r', encoding='utf-8-sig') as f: + if '.csv' in file_name: + for line in f: + if line in ['\n', '\r\n']: + pass + else: + search_keyword.append(line.replace('\n', '').replace('\r', '')) + # print(line) + #print(search_keyword) + elif '.txt' in file_name: + for line in f: + if line in ['\n', '\r\n']: + pass + else: + # print line + search_keyword.append(line.replace('\n', '').replace('\r', '')) + #print(search_keyword) + else: + print("Invalid file type: Valid file types are either .txt or .csv \n" + "exiting...") + sys.exit() + + # both time and time range should not be allowed in the same query + if arguments['time'] and arguments['time_range']: + parser.error('Either time or time range should be used in a query. Both cannot be used at the same time.') + + #Additional words added to keywords + if arguments['suffix_keywords']: + suffix_keywords = [" " + str(sk) for sk in arguments['suffix_keywords'].split(',')] + else: + suffix_keywords = [] + + # Setting limit on number of images to be downloaded + if arguments['limit']: + limit = int(arguments['limit']) + else: + limit = 100 + + if arguments['url']: + current_time = str(datetime.datetime.now()).split('.')[0] + search_keyword = [current_time.replace(":", "_")] + + if arguments['similar_images']: + current_time = str(datetime.datetime.now()).split('.')[0] + search_keyword = [current_time.replace(":", "_")] + + # If single_image or url argument not present then keywords is mandatory argument + if arguments['single_image'] is None and arguments['url'] is None and arguments['similar_images'] is None and arguments['keywords'] is None and arguments['keywords_from_file'] is None: + parser.error('Keywords is a required argument!') + + # If this argument is present, set the custom output directory + if arguments['output_directory']: + main_directory = arguments['output_directory'] + else: + main_directory = "downloads" + + # Set the delay parameter if this argument is present + if arguments['delay']: + try: + delay_time = int(arguments['delay']) + except ValueError: + parser.error('Delay parameter should be an integer!') + else: + delay_time = 0 + + if arguments['print_urls']: + print_url = 'yes' + else: + print_url = 'no' + + if arguments['print_size']: + print_size = 'yes' + else: + print_size = 'no' + + if arguments['proxy']: + os.environ["http_proxy"] = arguments['proxy'] + os.environ["https_proxy"] = arguments['proxy'] + #Initialization Complete + + if arguments['single_image']: #Download Single Image using a URL + single_image() + else: # or download multiple images based on keywords/keyphrase search + t0 = time.time() # start the timer + errorCount = bulk_download(search_keyword,suffix_keywords,limit,main_directory) + + print("\nEverything downloaded!") + print("Total Errors: " + str(errorCount) + "\n") + t1 = time.time() # stop the timer + total_time = t1 - t0 # Calculating the total time required to crawl, find and download all the links of 60,000 images + print("Total time taken: " + str(total_time) + " Seconds") #--------End of the main program --------# # In[ ]: From 754734c88b9536592e5de9879b8baab1341e9d13 Mon Sep 17 00:00:00 2001 From: Vasa Date: Mon, 26 Mar 2018 14:51:54 -0700 Subject: [PATCH 22/83] doc update on new feature release --- README.rst | 46 +++++++++++++++++++++++++++++++++++++++++++++- setup.py | 2 +- 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index ee63c510..15f6c997 100644 --- a/README.rst +++ b/README.rst @@ -71,6 +71,15 @@ Arguments +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | Argument | Short hand | Description | +===================+=============+===============================================================================================================================+ +| config_file | cf | You can pass the arguments inside a config file. This is an alternative to passing arguments on the command line directly. | +| | | | +| | | Please refer to the | +| | | `config file format `__ below | +| | | | +| | | * If 'config_file' argument is present, the program will use the config file and command line arguments will be discarded | +| | | * Config file can only be in **JSON** format | +| | | * Please refrain from passing invalid arguments from config file. Refer to the below arguments list | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | keywords | k | Denotes the keywords/key phrases you want to search for. For more than one keywords, wrap it in single quotes. | | | | | | | | Tips: | @@ -202,10 +211,45 @@ Arguments **Note:** If ``single_image`` or ``url`` parameter is not present, then keywords is a mandatory parameter. No other parameters are mandatory. +Config File Format +================== + +You can either pass the arguments directly from the command as in the examples below or you can pass it through a config file. Below is a sample of how a config +file looks. + +You can pass more than one record through a config file. The below sample consist of two set of records. The code will iterate through each of the record and +download images based on arguments passed. + +.. code:: json + + { + "Records": [ + { + "keywords": "apple", + "limit": 5, + "color": "green", + "print_urls": true + }, + { + "keywords": "universe", + "limit": 15, + "size": "large", + "print_urls": true + } + ] + } + + Examples ======== -- Simple examples +- If you are passing arguments from a config file, simply pass the config_file argument with name of your JSON file + +.. code-block:: bash + + $ googleimagesdownload -cf example.json + +- Simple example of using keywords and limit arguments .. code-block:: bash diff --git a/setup.py b/setup.py index 904ceaec..36fad4e0 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from codecs import open from os import path -__version__ = '1.1.0' +__version__ = '1.2.0' here = path.abspath(path.dirname(__file__)) From c3a9c2ba51779688988c89c940f12bd525c7f2b4 Mon Sep 17 00:00:00 2001 From: Vasa Date: Mon, 26 Mar 2018 21:59:14 -0700 Subject: [PATCH 23/83] doc update: clarification on suffix keywords --- README.rst | 5 +++-- setup.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 15f6c997..813f5d31 100644 --- a/README.rst +++ b/README.rst @@ -97,9 +97,10 @@ Arguments +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | suffix_keywords | sk | Denotes additional words added after main keyword while making the search query. | | | | | -| | | Useful when you have multiple suffix keywords for one keyword. | -| | | | | | | The final search query would be: | +| | | | +| | | So, for example, if the keyword is 'car' and suffix_keyword is 'red,yellow,blue', it will search and download images for | +| | | 'car red', 'car yellow' and 'car blue' individually | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | limit | l | Denotes number of images that you want to download. | | | | | diff --git a/setup.py b/setup.py index 36fad4e0..a028b62b 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from codecs import open from os import path -__version__ = '1.2.0' +__version__ = '1.2.1' here = path.abspath(path.dirname(__file__)) From cc03ef8184222c2e5a42599dca3b0753d67612df Mon Sep 17 00:00:00 2001 From: Vasa Date: Fri, 30 Mar 2018 00:53:20 -0700 Subject: [PATCH 24/83] moved from gechodriver to chromedriver --- README.rst | 27 ++++---- .../google_images_download.py | 68 ++++++++++++------- setup.py | 4 +- 3 files changed, 61 insertions(+), 38 deletions(-) diff --git a/README.rst b/README.rst index 813f5d31..f07fc50f 100644 --- a/README.rst +++ b/README.rst @@ -15,7 +15,7 @@ and optionally download images to your computer. This is a small and ready-to-run program. No dependencies are required to be installed if you would only want to download up to 100 images per keyword. If you would want more than 100 -images per keyword, then you would need to install ``Selenium`` library along with ``geckodriver``. +images per keyword, then you would need to install ``Selenium`` library along with ``chromedriver``. Detailed instructions in the troubleshooting section. @@ -207,6 +207,10 @@ Arguments | | | | | | | This feature can be used to rename files for image identification purpose. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| chromedriver | cd | With this argument you can pass the path to the 'chromedriver'. | +| | | | +| | | The path looks like this: "path/to/chromedriver". In windows it will be "path/to/chromedriver.exe" | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | help | h | show the help message regarding the usage of the above arguments | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ @@ -398,24 +402,21 @@ On MAC and Linux, when you get permission denied when installing the library usi You can also run pip install as a superuser with ``sudo pip install google_images_download`` but it is not generally a good idea because it can cause issues with your system-level packages. -**## Installing the geckodriver (with Selenium)** +**## Installing the chromedriver (with Selenium)** -If you would want to download more than 100 images per keyword, then you will need to install 'selenium' along with geckodriver. +If you would want to download more than 100 images per keyword, then you will need to install 'selenium' along with 'chromedriver'. -If you have pip installed the library or run the setup.py file, Selenium would have automatically installed on your machine. You will also need Firefox browser on your machine. For geckidriver: +If you have pip installed the library or run the setup.py file, Selenium would have automatically installed on your machine. You will also need Chrome browser on your machine. For chromedriver: -`Download the correct geckodriver `__ based on your operating system. Below example shows how to install it for Linux operating system. - -.. code-block:: bash +`Download the correct chromedriver `__ based on your operating system. - $ wget https://github.com/mozilla/geckodriver/releases/download/v0.20.0/geckodriver-v0.20.0-linux64.tar.gz - $ tar -xvzf geckodriver* - $ chmod +x geckodriver - $ export PATH=$PATH:/path-to-extracted-file/geckodriver +On **Windows** or **MAC** if for some reason the chromedriver gives you trouble, download it under the current directory and run the command. -For **Windows** if for some reason the geckodriver gives you trouble, download it under the current directory and run the command. +On **Linux** if you are having issues installing google chrome browser, refer to this `CentOS or Amazon Linux Guide `__ +or `Ubuntu Guide `__ -On **Linux** if you get errors related to ``libmozgtk.so``, refer to this `Link `__ +For **All the operating systems** you will have to use '--chromedriver' or '-cd' argument to specify the path of +chromedriver that you have downloaded in your machine. Structure ========= diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index a57a94fc..98fab9e9 100644 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -84,6 +84,7 @@ choices=['Arabic','Chinese (Simplified)','Chinese (Traditional)','Czech','Danish','Dutch','English','Estonian','Finnish','French','German','Greek','Hebrew','Hungarian','Icelandic','Italian','Japanese','Korean','Latvian','Lithuanian','Norwegian','Portuguese','Polish','Romanian','Russian','Spanish','Swedish','Turkish']) parser.add_argument('-pr', '--prefix', default=False, help="A word that you would want to prefix in front of each image name", type=str, required=False) parser.add_argument('-px', '--proxy', help='specify a proxy address and port', type=str, required=False) + parser.add_argument('-cd', '--chromedriver', help='specify the path to chromedriver executable in your local machine', type=str, required=False) args = parser.parse_args() arguments = vars(args) @@ -122,31 +123,52 @@ def download_page(url): # Download Page for more than 100 images def download_extended_page(url): + from selenium import webdriver + from selenium.webdriver.common.keys import Keys + if sys.version_info[0] < 3: + reload(sys) + sys.setdefaultencoding('utf8') + options = webdriver.ChromeOptions() + options.add_argument('--no-sandbox') + options.add_argument("--headless") + try: - from selenium import webdriver - scrolls = 5 - driver = webdriver.Firefox() - driver.get(url) - for scroll in range(scrolls): - for page_scroll in range(10): - driver.execute_script("window.scrollBy(0, 10000)") - time.sleep(0.5) - time.sleep(1) - try: - driver.find_element_by_xpath("//input[@value='Show more results']").click() - except: - print("End of page reached...") - break - version = (3, 0) - cur_version = sys.version_info - if cur_version >= version: # If the Current Version of Python is 3.0 or above - page = driver.page_source - else: #python 2 - page = driver.page_source.encode('utf-8') - driver.quit() - return page + browser = webdriver.Chrome(arguments['chromedriver'], chrome_options=options) except: - return "Page Not found" + print("Looks like we cannot locate the path the 'chromedriver'. Please use the '--chromedriver' " + "argument to specify the path to the executable.") + sys.exit() + browser.set_window_size(1024, 768) + + # Open the link + browser.get(url) + time.sleep(1) + print("Getting you a lot of images. This may take a few moments...") + + element = browser.find_element_by_tag_name("body") + # Scroll down + for i in range(30): + element.send_keys(Keys.PAGE_DOWN) + time.sleep(0.3) + + try: + browser.find_element_by_id("smb").click() + for i in range(50): + element.send_keys(Keys.PAGE_DOWN) + time.sleep(0.3) # bot id protection + except: + for i in range(10): + element.send_keys(Keys.PAGE_DOWN) + time.sleep(0.3) # bot id protection + + print("Reached end of Page.") + time.sleep(0.5) + + source = browser.page_source #page source + #close the browser + browser.close() + + return source #Correcting the escape characters for python2 def replace_with_byte(match): diff --git a/setup.py b/setup.py index a028b62b..a0212850 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from codecs import open from os import path -__version__ = '1.2.1' +__version__ = '1.3.1' here = path.abspath(path.dirname(__file__)) @@ -26,7 +26,7 @@ download_url='https://github.com/hardikvasa/google-images-download/tarball/' + __version__, license='MIT', classifiers=[ - 'Development Status :: 3 - Alpha', + 'Development Status :: 4 - Beta', 'Intended Audience :: Developers', 'License :: OSI Approved :: MIT License', 'Programming Language :: Python :: 2.7', From a252a2d24292d99bd5836583845be4a7f8bee576 Mon Sep 17 00:00:00 2001 From: Vasa Date: Fri, 30 Mar 2018 17:05:33 -0700 Subject: [PATCH 25/83] removed unwanted files from docs --- docs/Makefile | 230 --------------------------------- docs/README.rst | 1 + docs/make.bat | 281 ---------------------------------------- docs/source/conf.py | 289 ------------------------------------------ docs/source/index.rst | 17 --- setup.py | 2 +- update_docs.sh | 26 ---- 7 files changed, 2 insertions(+), 844 deletions(-) delete mode 100644 docs/Makefile create mode 100644 docs/README.rst delete mode 100644 docs/make.bat delete mode 100644 docs/source/conf.py delete mode 100644 docs/source/index.rst delete mode 100644 update_docs.sh diff --git a/docs/Makefile b/docs/Makefile deleted file mode 100644 index 09806c60..00000000 --- a/docs/Makefile +++ /dev/null @@ -1,230 +0,0 @@ -# Makefile for Sphinx documentation -# - -# You can set these variables from the command line. -SPHINXOPTS = -SPHINXBUILD = sphinx-build -PAPER = -BUILDDIR = build - -# User-friendly check for sphinx-build -ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) - $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don\'t have Sphinx installed, grab it from http://sphinx-doc.org/) -endif - -# Internal variables. -PAPEROPT_a4 = -D latex_paper_size=a4 -PAPEROPT_letter = -D latex_paper_size=letter -ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source -# the i18n builder cannot share the environment and doctrees with the others -I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source - -.PHONY: help -help: - @echo "Please use \`make ' where is one of" - @echo " html to make standalone HTML files" - @echo " dirhtml to make HTML files named index.html in directories" - @echo " singlehtml to make a single large HTML file" - @echo " pickle to make pickle files" - @echo " json to make JSON files" - @echo " htmlhelp to make HTML files and a HTML help project" - @echo " qthelp to make HTML files and a qthelp project" - @echo " applehelp to make an Apple Help Book" - @echo " devhelp to make HTML files and a Devhelp project" - @echo " epub to make an epub" - @echo " epub3 to make an epub3" - @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" - @echo " latexpdf to make LaTeX files and run them through pdflatex" - @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" - @echo " text to make text files" - @echo " man to make manual pages" - @echo " texinfo to make Texinfo files" - @echo " info to make Texinfo files and run them through makeinfo" - @echo " gettext to make PO message catalogs" - @echo " changes to make an overview of all changed/added/deprecated items" - @echo " xml to make Docutils-native XML files" - @echo " pseudoxml to make pseudoxml-XML files for display purposes" - @echo " linkcheck to check all external links for integrity" - @echo " doctest to run all doctests embedded in the documentation (if enabled)" - @echo " coverage to run coverage check of the documentation (if enabled)" - @echo " dummy to check syntax errors of document sources" - -.PHONY: clean -clean: - rm -rf $(BUILDDIR)/* - -.PHONY: html -html: - $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." - -.PHONY: dirhtml -dirhtml: - $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." - -.PHONY: singlehtml -singlehtml: - $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml - @echo - @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." - -.PHONY: pickle -pickle: - $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle - @echo - @echo "Build finished; now you can process the pickle files." - -.PHONY: json -json: - $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json - @echo - @echo "Build finished; now you can process the JSON files." - -.PHONY: htmlhelp -htmlhelp: - $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp - @echo - @echo "Build finished; now you can run HTML Help Workshop with the" \ - ".hhp project file in $(BUILDDIR)/htmlhelp." - -.PHONY: qthelp -qthelp: - $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp - @echo - @echo "Build finished; now you can run "qcollectiongenerator" with the" \ - ".qhcp project file in $(BUILDDIR)/qthelp, like this:" - @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/twitterpandas.qhcp" - @echo "To view the help file:" - @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/twitterpandas.qhc" - -.PHONY: applehelp -applehelp: - $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp - @echo - @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." - @echo "N.B. You won't be able to view it unless you put it in" \ - "~/Library/Documentation/Help or install it in your application" \ - "bundle." - -.PHONY: devhelp -devhelp: - $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp - @echo - @echo "Build finished." - @echo "To view the help file:" - @echo "# mkdir -p $$HOME/.local/share/devhelp/twitterpandas" - @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/twitterpandas" - @echo "# devhelp" - -.PHONY: epub -epub: - $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub - @echo - @echo "Build finished. The epub file is in $(BUILDDIR)/epub." - -.PHONY: epub3 -epub3: - $(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3 - @echo - @echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3." - -.PHONY: latex -latex: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo - @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." - @echo "Run \`make' in that directory to run these through (pdf)latex" \ - "(use \`make latexpdf' here to do that automatically)." - -.PHONY: latexpdf -latexpdf: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo "Running LaTeX files through pdflatex..." - $(MAKE) -C $(BUILDDIR)/latex all-pdf - @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." - -.PHONY: latexpdfja -latexpdfja: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo "Running LaTeX files through platex and dvipdfmx..." - $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja - @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." - -.PHONY: text -text: - $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text - @echo - @echo "Build finished. The text files are in $(BUILDDIR)/text." - -.PHONY: man -man: - $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man - @echo - @echo "Build finished. The manual pages are in $(BUILDDIR)/man." - -.PHONY: texinfo -texinfo: - $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo - @echo - @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." - @echo "Run \`make' in that directory to run these through makeinfo" \ - "(use \`make info' here to do that automatically)." - -.PHONY: info -info: - $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo - @echo "Running Texinfo files through makeinfo..." - make -C $(BUILDDIR)/texinfo info - @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." - -.PHONY: gettext -gettext: - $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale - @echo - @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." - -.PHONY: changes -changes: - $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes - @echo - @echo "The overview file is in $(BUILDDIR)/changes." - -.PHONY: linkcheck -linkcheck: - $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck - @echo - @echo "Link check complete; look for any errors in the above output " \ - "or in $(BUILDDIR)/linkcheck/output.txt." - -.PHONY: doctest -doctest: - $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest - @echo "Testing of doctests in the sources finished, look at the " \ - "results in $(BUILDDIR)/doctest/output.txt." - -.PHONY: coverage -coverage: - $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage - @echo "Testing of coverage in the sources finished, look at the " \ - "results in $(BUILDDIR)/coverage/python.txt." - -.PHONY: xml -xml: - $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml - @echo - @echo "Build finished. The XML files are in $(BUILDDIR)/xml." - -.PHONY: pseudoxml -pseudoxml: - $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml - @echo - @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." - -.PHONY: dummy -dummy: - $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy - @echo - @echo "Build finished. Dummy builder generates no files." diff --git a/docs/README.rst b/docs/README.rst new file mode 100644 index 00000000..916c316f --- /dev/null +++ b/docs/README.rst @@ -0,0 +1 @@ +Documents coming soon! \ No newline at end of file diff --git a/docs/make.bat b/docs/make.bat deleted file mode 100644 index 509abe3c..00000000 --- a/docs/make.bat +++ /dev/null @@ -1,281 +0,0 @@ -@ECHO OFF - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build -) -set BUILDDIR=build -set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source -set I18NSPHINXOPTS=%SPHINXOPTS% source -if NOT "%PAPER%" == "" ( - set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% - set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% -) - -if "%1" == "" goto help - -if "%1" == "help" ( - :help - echo.Please use `make ^` where ^ is one of - echo. html to make standalone HTML files - echo. dirhtml to make HTML files named index.html in directories - echo. singlehtml to make a single large HTML file - echo. pickle to make pickle files - echo. json to make JSON files - echo. htmlhelp to make HTML files and a HTML help project - echo. qthelp to make HTML files and a qthelp project - echo. devhelp to make HTML files and a Devhelp project - echo. epub to make an epub - echo. epub3 to make an epub3 - echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter - echo. text to make text files - echo. man to make manual pages - echo. texinfo to make Texinfo files - echo. gettext to make PO message catalogs - echo. changes to make an overview over all changed/added/deprecated items - echo. xml to make Docutils-native XML files - echo. pseudoxml to make pseudoxml-XML files for display purposes - echo. linkcheck to check all external links for integrity - echo. doctest to run all doctests embedded in the documentation if enabled - echo. coverage to run coverage check of the documentation if enabled - echo. dummy to check syntax errors of document sources - goto end -) - -if "%1" == "clean" ( - for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i - del /q /s %BUILDDIR%\* - goto end -) - - -REM Check if sphinx-build is available and fallback to Python version if any -%SPHINXBUILD% 1>NUL 2>NUL -if errorlevel 9009 goto sphinx_python -goto sphinx_ok - -:sphinx_python - -set SPHINXBUILD=python -m sphinx.__init__ -%SPHINXBUILD% 2> nul -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.http://sphinx-doc.org/ - exit /b 1 -) - -:sphinx_ok - - -if "%1" == "html" ( - %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The HTML pages are in %BUILDDIR%/html. - goto end -) - -if "%1" == "dirhtml" ( - %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. - goto end -) - -if "%1" == "singlehtml" ( - %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. - goto end -) - -if "%1" == "pickle" ( - %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle - if errorlevel 1 exit /b 1 - echo. - echo.Build finished; now you can process the pickle files. - goto end -) - -if "%1" == "json" ( - %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json - if errorlevel 1 exit /b 1 - echo. - echo.Build finished; now you can process the JSON files. - goto end -) - -if "%1" == "htmlhelp" ( - %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp - if errorlevel 1 exit /b 1 - echo. - echo.Build finished; now you can run HTML Help Workshop with the ^ -.hhp project file in %BUILDDIR%/htmlhelp. - goto end -) - -if "%1" == "qthelp" ( - %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp - if errorlevel 1 exit /b 1 - echo. - echo.Build finished; now you can run "qcollectiongenerator" with the ^ -.qhcp project file in %BUILDDIR%/qthelp, like this: - echo.^> qcollectiongenerator %BUILDDIR%\qthelp\twitterpandas.qhcp - echo.To view the help file: - echo.^> assistant -collectionFile %BUILDDIR%\qthelp\twitterpandas.ghc - goto end -) - -if "%1" == "devhelp" ( - %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. - goto end -) - -if "%1" == "epub" ( - %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The epub file is in %BUILDDIR%/epub. - goto end -) - -if "%1" == "epub3" ( - %SPHINXBUILD% -b epub3 %ALLSPHINXOPTS% %BUILDDIR%/epub3 - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The epub3 file is in %BUILDDIR%/epub3. - goto end -) - -if "%1" == "latex" ( - %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex - if errorlevel 1 exit /b 1 - echo. - echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. - goto end -) - -if "%1" == "latexpdf" ( - %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex - cd %BUILDDIR%/latex - make all-pdf - cd %~dp0 - echo. - echo.Build finished; the PDF files are in %BUILDDIR%/latex. - goto end -) - -if "%1" == "latexpdfja" ( - %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex - cd %BUILDDIR%/latex - make all-pdf-ja - cd %~dp0 - echo. - echo.Build finished; the PDF files are in %BUILDDIR%/latex. - goto end -) - -if "%1" == "text" ( - %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The text files are in %BUILDDIR%/text. - goto end -) - -if "%1" == "man" ( - %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The manual pages are in %BUILDDIR%/man. - goto end -) - -if "%1" == "texinfo" ( - %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. - goto end -) - -if "%1" == "gettext" ( - %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The message catalogs are in %BUILDDIR%/locale. - goto end -) - -if "%1" == "changes" ( - %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes - if errorlevel 1 exit /b 1 - echo. - echo.The overview file is in %BUILDDIR%/changes. - goto end -) - -if "%1" == "linkcheck" ( - %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck - if errorlevel 1 exit /b 1 - echo. - echo.Link check complete; look for any errors in the above output ^ -or in %BUILDDIR%/linkcheck/output.txt. - goto end -) - -if "%1" == "doctest" ( - %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest - if errorlevel 1 exit /b 1 - echo. - echo.Testing of doctests in the sources finished, look at the ^ -results in %BUILDDIR%/doctest/output.txt. - goto end -) - -if "%1" == "coverage" ( - %SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage - if errorlevel 1 exit /b 1 - echo. - echo.Testing of coverage in the sources finished, look at the ^ -results in %BUILDDIR%/coverage/python.txt. - goto end -) - -if "%1" == "xml" ( - %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The XML files are in %BUILDDIR%/xml. - goto end -) - -if "%1" == "pseudoxml" ( - %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. - goto end -) - -if "%1" == "dummy" ( - %SPHINXBUILD% -b dummy %ALLSPHINXOPTS% %BUILDDIR%/dummy - if errorlevel 1 exit /b 1 - echo. - echo.Build finished. Dummy builder generates no files. - goto end -) - -:end diff --git a/docs/source/conf.py b/docs/source/conf.py deleted file mode 100644 index 9d43f086..00000000 --- a/docs/source/conf.py +++ /dev/null @@ -1,289 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# -# google-images-download documentation build configuration file, created by -# cookiecutter pipproject -# -# This file is execfile()d with the current directory set to its -# containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. - -import sys -import os - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -sys.path.insert(0, os.path.abspath('../..')) - -# -- General configuration ------------------------------------------------ - -# If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - 'sphinx.ext.autodoc', -] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] - -# The suffix(es) of source filenames. -# You can specify multiple suffix as a list of string: -# source_suffix = ['.rst', '.md'] -source_suffix = '.rst' - -# The encoding of source files. -#source_encoding = 'utf-8-sig' - -# The master toctree document. -master_doc = 'index' - -# General information about the project. -project = 'google-images-download' -copyright = '2016, Hardik Vasa' -author = 'Hardik Vasa' - -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. -# -# The short X.Y version. -version = '1.0.0' -# The full version, including alpha/beta/rc tags. -release = '1.0.0' - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# -# This is also used if you do content translation via gettext catalogs. -# Usually you set "language" from the command line for these cases. -language = None - -# There are two options for replacing |today|: either, you set today to some -# non-false value, then it is used: -#today = '' -# Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This patterns also effect to html_static_path and html_extra_path -exclude_patterns = [] - -# The reST default role (used for this markup: `text`) to use for all -# documents. -#default_role = None - -# If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True - -# If true, the current module name will be prepended to all description -# unit titles (such as .. function::). -#add_module_names = True - -# If true, sectionauthor and moduleauthor directives will be shown in the -# output. They are ignored by default. -#show_authors = False - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' - -# A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] - -# If true, keep warnings as "system message" paragraphs in the built documents. -#keep_warnings = False - -# If true, `todo` and `todoList` produce output, else they produce nothing. -todo_include_todos = False - - -# -- Options for HTML output ---------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -html_theme = 'sphinx_rtd_theme' - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -#html_theme_options = {} - -# Add any paths that contain custom themes here, relative to this directory. -#html_theme_path = [] - -# The name for this set of Sphinx documents. -# " v documentation" by default. -#html_title = 'google-images-download v1.0.0' - -# A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None - -# The name of an image file (relative to this directory) to place at the top -# of the sidebar. -#html_logo = None - -# The name of an image file (relative to this directory) to use as a favicon of -# the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 -# pixels large. -#html_favicon = None - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] - -# Add any extra paths that contain custom files (such as robots.txt or -# .htaccess) here, relative to this directory. These files are copied -# directly to the root of the documentation. -#html_extra_path = [] - -# If not None, a 'Last updated on:' timestamp is inserted at every page -# bottom, using the given strftime format. -# The empty string is equivalent to '%b %d, %Y'. -#html_last_updated_fmt = None - -# If true, SmartyPants will be used to convert quotes and dashes to -# typographically correct entities. -#html_use_smartypants = True - -# Custom sidebar templates, maps document names to template names. -#html_sidebars = {} - -# Additional templates that should be rendered to pages, maps page names to -# template names. -#html_additional_pages = {} - -# If false, no module index is generated. -#html_domain_indices = True - -# If false, no index is generated. -#html_use_index = True - -# If true, the index is split into individual pages for each letter. -#html_split_index = False - -# If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True - -# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True - -# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True - -# If true, an OpenSearch description file will be output, and all pages will -# contain a tag referring to it. The value of this option must be the -# base URL from which the finished HTML is served. -#html_use_opensearch = '' - -# This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None - -# Language to be used for generating the HTML full-text search index. -# Sphinx supports the following languages: -# 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' -# 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr', 'zh' -#html_search_language = 'en' - -# A dictionary with options for the search language support, empty by default. -# 'ja' uses this config value. -# 'zh' user can custom change `jieba` dictionary path. -#html_search_options = {'type': 'default'} - -# The name of a javascript file (relative to the configuration directory) that -# implements a search results scorer. If empty, the default will be used. -#html_search_scorer = 'scorer.js' - -# Output file base name for HTML help builder. -htmlhelp_basename = 'google-images-downloaddoc' - -# -- Options for LaTeX output --------------------------------------------- - -latex_elements = { -# The paper size ('letterpaper' or 'a4paper'). -#'papersize': 'letterpaper', - -# The font size ('10pt', '11pt' or '12pt'). -#'pointsize': '10pt', - -# Additional stuff for the LaTeX preamble. -#'preamble': '', - -# Latex figure (float) alignment -#'figure_align': 'htbp', -} - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, -# author, documentclass [howto, manual, or own class]). -latex_documents = [ - (master_doc, 'google-images-download.tex', 'google-images-download Documentation', - 'Hardik Vasa', 'manual'), -] - -# The name of an image file (relative to this directory) to place at the top of -# the title page. -#latex_logo = None - -# For "manual" documents, if this is true, then toplevel headings are parts, -# not chapters. -#latex_use_parts = False - -# If true, show page references after internal links. -#latex_show_pagerefs = False - -# If true, show URL addresses after external links. -#latex_show_urls = False - -# Documents to append as an appendix to all manuals. -#latex_appendices = [] - -# If false, no module index is generated. -#latex_domain_indices = True - - -# -- Options for manual page output --------------------------------------- - -# One entry per manual page. List of tuples -# (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'google-images-download', 'google-images-download Documentation', - [author], 1) -] - -# If true, show URL addresses after external links. -#man_show_urls = False - - -# -- Options for Texinfo output ------------------------------------------- - -# Grouping the document tree into Texinfo files. List of tuples -# (source start file, target name, title, author, -# dir menu entry, description, category) -texinfo_documents = [ - (master_doc, 'google-images-download', 'google-images-download Documentation', - author, 'google-images-download', 'One line description of project.', - 'Miscellaneous'), -] - -# Documents to append as an appendix to all manuals. -#texinfo_appendices = [] - -# If false, no module index is generated. -#texinfo_domain_indices = True - -# How to display URL addresses: 'footnote', 'no', or 'inline'. -#texinfo_show_urls = 'footnote' - -# If true, do not generate a @detailmenu in the "Top" node's menu. -#texinfo_no_detailmenu = False diff --git a/docs/source/index.rst b/docs/source/index.rst deleted file mode 100644 index 4cffb875..00000000 --- a/docs/source/index.rst +++ /dev/null @@ -1,17 +0,0 @@ -Welcome to google-images-download's documentation! -========================================= - -Contents: - -.. toctree:: - :maxdepth: 2 - - - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` - diff --git a/setup.py b/setup.py index a0212850..9198bf3a 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from codecs import open from os import path -__version__ = '1.3.1' +__version__ = '1.3.2' here = path.abspath(path.dirname(__file__)) diff --git a/update_docs.sh b/update_docs.sh deleted file mode 100644 index a092a3a4..00000000 --- a/update_docs.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env bash - -# build the docs -cd docs -make clean -make html -cd .. - -# commit and push -git add -A -git commit -m "building and pushing docs" -git push origin master - -# switch branches and pull the data we want -git checkout gh-pages -rm -rf . -touch .nojekyll -git checkout master docs/build/html -mv ./docs/build/html/* ./ -rm -rf ./docs -git add -A -git commit -m "publishing updated docs..." -git push origin gh-pages - -# switch back -git checkout master \ No newline at end of file From 655a7fa1eb21d1a60443dd9ff4ca351d7a26148a Mon Sep 17 00:00:00 2001 From: Vasa Date: Sat, 31 Mar 2018 14:18:12 -0700 Subject: [PATCH 26/83] feature to download all the relevant images to the keyword provided --- README.rst | 19 +++++++ .../google_images_download.py | 55 ++++++++++++++++++- setup.py | 2 +- 3 files changed, 73 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index f07fc50f..494095c6 100644 --- a/README.rst +++ b/README.rst @@ -108,6 +108,15 @@ Arguments | | | | | | | If this value is not specified, it defaults to 100. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| related_images | ri | This argument downloads a ton of images related to the keyword you provided. | +| | | | +| | | Google Images page returns list of related keywords to the keyword you have mentioned in the query. This tool downloads | +| | | images from each of those related keywords based on the limit you have mentioned in your query | +| | | | +| | | This argument does not take any value. Just add '--related_images' or '-ri' in your query. | +| | | | +| | | **Note:** This argument can download hundreds or thousands of additional images so please use this carefully. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | format | f | Denotes the format/extension of the image that you want to download. | | | | | | | | `Possible values: jpg, gif, png, bmp, svg, webp, ico` | @@ -176,18 +185,26 @@ Arguments | specific_site | ss | Allows you to download images with keywords only from a specific website/domain name you mention. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | print_urls | p | Print the URLs of the images on the console. These image URLs can be used for debugging purposes | +| | | | +| | | This argument does not take any value. Just add '--print_urls' or '-p' in your query. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | print_size | ps | Prints the size of the images on the console | | | | | | | | The size denoted the actual size of the image and not the size of the image on disk | +| | | | +| | | This argument does not take any value. Just add '--print_size' or '-ps' in your query. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | metadata | m | Prints the metada of the image on the console. | | | | | | | | This includes image size, origin, image attributes, description, image URL, etc. | +| | | | +| | | This argument does not take any value. Just add '--metadata' or '-m' in your query. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | extract_metadata | e | This option allows you to save metadata of all the downloaded images in a text file. | | | | | | | | This file can be found in the ``logs/`` directory. The name of the file would be same as the keyword nam | +| | | | +| | | This argument does not take any value. Just add '--extract_metadata' or '-e' in your query. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | socket_timeout | st | Allows you to specify the time to wait for socket connection. | | | | | @@ -196,6 +213,8 @@ Arguments | thumbnail | th | Downloads image thumbnails corresponding to each image downloaded. | | | | | | | | Thumbnails are saved in their own sub-directories inside of the main directory. | +| | | | +| | | This argument does not take any value. Just add '--thumbnail' or '-th' in your query. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | language | la | Defines the language filter. The search results are automatically returned in that language | | | | | diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index 98fab9e9..71890e59 100644 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -85,6 +85,7 @@ parser.add_argument('-pr', '--prefix', default=False, help="A word that you would want to prefix in front of each image name", type=str, required=False) parser.add_argument('-px', '--proxy', help='specify a proxy address and port', type=str, required=False) parser.add_argument('-cd', '--chromedriver', help='specify the path to chromedriver executable in your local machine', type=str, required=False) + parser.add_argument('-ri', '--related_images', default=False, help="Downloads images that are similar to the keyword provided", action="store_true") args = parser.parse_args() arguments = vars(args) @@ -135,8 +136,9 @@ def download_extended_page(url): try: browser = webdriver.Chrome(arguments['chromedriver'], chrome_options=options) except: - print("Looks like we cannot locate the path the 'chromedriver'. Please use the '--chromedriver' " - "argument to specify the path to the executable.") + print("Looks like we cannot locate the path the 'chromedriver' (use the '--chromedriver' " + "argument to specify the path to the executable.) or google chrome browser is not " + "installed on your machine") sys.exit() browser.set_window_size(1024, 768) @@ -178,6 +180,41 @@ def repair(brokenjson): invalid_escape = re.compile(r'\\[0-7]{1,3}') # up to 3 digits for byte values up to FF return invalid_escape.sub(replace_with_byte, brokenjson) +# Finding 'Next Image' from the given raw page +def get_next_tab(s): + start_line = s.find('class="ZO5Spb"') + if start_line == -1: # If no links are found then give an error! + end_quote = 0 + link = "no_tabs" + return link,'',end_quote + else: + start_line = s.find('class="ZO5Spb"') + start_content = s.find('href="', start_line + 1) + end_content = s.find('">', start_content + 1) + url_item = "https://www.google.com" + str(s[start_content+6:end_content]) + url_item = url_item.replace('&', '&') + + start_line_2 = s.find('class="ZO5Spb"') + start_content_2 = s.find(':', start_line_2 + 1) + end_content_2 = s.find('"', start_content_2 + 1) + url_item_name = str(s[start_content_2 + 1:end_content_2]) + + #print(url_item,url_item_name) + return url_item,url_item_name,end_content + + +# Getting all links with the help of '_images_get_next_image' +def get_all_tabs(page): + tabs = {} + while True: + item,item_name,end_content = get_next_tab(page) + if item == "no_tabs": + break + else: + tabs[item_name] = item # Append all the links in the list named 'Links' + time.sleep(0.1) # Timer could be used to slow down the request for image downloads + page = page[end_content:] + return tabs #Format the object in readable format def format_object(object): @@ -613,6 +650,20 @@ def bulk_download(search_keyword,suffix_keywords,limit,main_directory): text_file.write(json.dumps(items, indent=4, sort_keys=True)) text_file.close() + #Related images + if arguments['related_images']: + print("\nGetting list of related keywords...this may take a few moments") + tabs = get_all_tabs(raw_html) + for key, value in tabs.items(): + final_search_term = (search_term + " - " + key) + print("\nNow Downloading - " + final_search_term) + if limit < 101: + new_raw_html = download_page(value) # download page + else: + new_raw_html = download_extended_page(value) + create_directories(main_directory, final_search_term) + _get_all_items(new_raw_html, main_directory, search_term + " - " + key, limit) + i += 1 return errorCount diff --git a/setup.py b/setup.py index 9198bf3a..d37331b3 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from codecs import open from os import path -__version__ = '1.3.2' +__version__ = '1.4.2' here = path.abspath(path.dirname(__file__)) From d1b675a6c62be6828ade1541b523b9ae61446200 Mon Sep 17 00:00:00 2001 From: Vasa Date: Sat, 31 Mar 2018 19:46:40 -0700 Subject: [PATCH 27/83] added prefix keywords feature --- README.rst | 7 ++ .../google_images_download.py | 115 ++++++++++-------- setup.py | 2 +- 3 files changed, 71 insertions(+), 53 deletions(-) diff --git a/README.rst b/README.rst index 494095c6..a41a8339 100644 --- a/README.rst +++ b/README.rst @@ -95,6 +95,13 @@ Arguments | | | | | | | Only file types '.txt' or '.csv' are allowed. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| prefix_keywords | pk | Denotes additional words added before main keyword while making the search query. | +| | | | +| | | The final search query would be: | +| | | | +| | | So, for example, if the keyword is 'car' and prefix_keyword is 'red,yellow,blue', it will search and download images for | +| | | 'red car', 'yellow car' and 'blue car' individually | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | suffix_keywords | sk | Denotes additional words added after main keyword while making the search query. | | | | | | | | The final search query would be: | diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index 71890e59..549ae387 100644 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -49,7 +49,8 @@ parser = argparse.ArgumentParser() parser.add_argument('-k', '--keywords', help='delimited list input', type=str, required=False) parser.add_argument('-kf', '--keywords_from_file', help='extract list of keywords from a text file', type=str, required=False) - parser.add_argument('-sk', '--suffix_keywords', help='comma separated additional words added to main keyword', type=str, required=False) + parser.add_argument('-sk', '--suffix_keywords', help='comma separated additional words added after to main keyword', type=str, required=False) + parser.add_argument('-pk', '--prefix_keywords', help='comma separated additional words added before main keyword', type=str, required=False) parser.add_argument('-l', '--limit', help='delimited list input', type=str, required=False) parser.add_argument('-f', '--format', help='download images with specific format', type=str, required=False, choices=['jpg', 'gif', 'png', 'bmp', 'svg', 'webp', 'ico']) @@ -611,60 +612,64 @@ def _get_all_items(page,main_directory,dir_name,limit): # Bulk Download -def bulk_download(search_keyword,suffix_keywords,limit,main_directory): +def bulk_download(search_keyword,suffix_keywords,prefix_keywords,limit,main_directory): # appending a dummy value to Suffix Keywords array if it is blank if len(suffix_keywords) == 0: suffix_keywords.append('') - for sky in suffix_keywords: # 1.for every suffix keywords - i = 0 - while i < len(search_keyword): # 2.for every main keyword - iteration = "\n" + "Item no.: " + str(i + 1) + " -->" + " Item name = " + str(search_keyword[i] + str(sky)) - print(iteration) - print("Evaluating...") - search_term = search_keyword[i] + sky - dir_name = search_term + ('-' + arguments['color'] if arguments['color'] else '') #sub-directory - - create_directories(main_directory,dir_name) #create directories in OS - - params = build_url_parameters() #building URL with params - - url = build_search_url(search_term,params) #building main search url - - if limit < 101: - raw_html = download_page(url) # download page - else: - raw_html = download_extended_page(url) - - print("Starting Download...") - items,errorCount = _get_all_items(raw_html,main_directory,dir_name,limit) #get all image items and download images - - #dumps into a text file - if arguments['extract_metadata']: - try: - if not os.path.exists("logs"): - os.makedirs("logs") - except OSError as e: - print(e) - text_file = open("logs/"+search_keyword[i]+".txt", "w") - text_file.write(json.dumps(items, indent=4, sort_keys=True)) - text_file.close() - - #Related images - if arguments['related_images']: - print("\nGetting list of related keywords...this may take a few moments") - tabs = get_all_tabs(raw_html) - for key, value in tabs.items(): - final_search_term = (search_term + " - " + key) - print("\nNow Downloading - " + final_search_term) - if limit < 101: - new_raw_html = download_page(value) # download page - else: - new_raw_html = download_extended_page(value) - create_directories(main_directory, final_search_term) - _get_all_items(new_raw_html, main_directory, search_term + " - " + key, limit) - - i += 1 + if len(prefix_keywords) == 0: + prefix_keywords.append('') + + for pky in prefix_keywords: + for sky in suffix_keywords: # 1.for every suffix keywords + i = 0 + while i < len(search_keyword): # 2.for every main keyword + iteration = "\n" + "Item no.: " + str(i + 1) + " -->" + " Item name = " + str(pky) + str(search_keyword[i] + str(sky)) + print(iteration) + print("Evaluating...") + search_term = pky + search_keyword[i] + sky + dir_name = search_term + ('-' + arguments['color'] if arguments['color'] else '') #sub-directory + + create_directories(main_directory,dir_name) #create directories in OS + + params = build_url_parameters() #building URL with params + + url = build_search_url(search_term,params) #building main search url + + if limit < 101: + raw_html = download_page(url) # download page + else: + raw_html = download_extended_page(url) + + print("Starting Download...") + items,errorCount = _get_all_items(raw_html,main_directory,dir_name,limit) #get all image items and download images + + #dumps into a text file + if arguments['extract_metadata']: + try: + if not os.path.exists("logs"): + os.makedirs("logs") + except OSError as e: + print(e) + text_file = open("logs/"+search_keyword[i]+".txt", "w") + text_file.write(json.dumps(items, indent=4, sort_keys=True)) + text_file.close() + + #Related images + if arguments['related_images']: + print("\nGetting list of related keywords...this may take a few moments") + tabs = get_all_tabs(raw_html) + for key, value in tabs.items(): + final_search_term = (search_term + " - " + key) + print("\nNow Downloading - " + final_search_term) + if limit < 101: + new_raw_html = download_page(value) # download page + else: + new_raw_html = download_extended_page(value) + create_directories(main_directory, final_search_term) + _get_all_items(new_raw_html, main_directory, search_term + " - " + key, limit) + + i += 1 return errorCount #------------- Main Program -------------# @@ -709,6 +714,12 @@ def bulk_download(search_keyword,suffix_keywords,limit,main_directory): else: suffix_keywords = [] + # Additional words added to keywords + if arguments['prefix_keywords']: + prefix_keywords = [str(sk) + " " for sk in arguments['prefix_keywords'].split(',')] + else: + prefix_keywords = [] + # Setting limit on number of images to be downloaded if arguments['limit']: limit = int(arguments['limit']) @@ -761,7 +772,7 @@ def bulk_download(search_keyword,suffix_keywords,limit,main_directory): single_image() else: # or download multiple images based on keywords/keyphrase search t0 = time.time() # start the timer - errorCount = bulk_download(search_keyword,suffix_keywords,limit,main_directory) + errorCount = bulk_download(search_keyword,suffix_keywords,prefix_keywords,limit,main_directory) print("\nEverything downloaded!") print("Total Errors: " + str(errorCount) + "\n") diff --git a/setup.py b/setup.py index d37331b3..0f7667e8 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from codecs import open from os import path -__version__ = '1.4.2' +__version__ = '1.4.3' here = path.abspath(path.dirname(__file__)) From 6ac195a401cf40b08678f1125bc17954d78f405d Mon Sep 17 00:00:00 2001 From: Vasa Date: Mon, 2 Apr 2018 11:02:49 -0700 Subject: [PATCH 28/83] corrected typos related to 'usage rights' filter --- README.rst | 10 +++++----- google_images_download/google_images_download.py | 4 ++-- setup.py | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/README.rst b/README.rst index a41a8339..525b454d 100644 --- a/README.rst +++ b/README.rst @@ -140,10 +140,10 @@ Arguments | | | | | | | `Possible values:` | | | | | -| | | * `labled-for-reuse-with-modifications`, | -| | | * `labled-for-reuse`, | -| | | * `labled-for-noncommercial-reuse-with-modification`, | -| | | * `labled-for-nocommercial-reuse` | +| | | * `labeled-for-reuse-with-modifications`, | +| | | * `labeled-for-reuse`, | +| | | * `labeled-for-noncommercial-reuse-with-modification`, | +| | | * `labeled-for-nocommercial-reuse` | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | size | s | Denotes the relative size of the image to be downloaded. | | | | | @@ -347,7 +347,7 @@ Examples .. code-block:: bash - $ googleimagesdownload --keywords "universe" --usage_rights labled-for-reuse + $ googleimagesdownload --keywords "universe" --usage_rights labeled-for-reuse - To download images with specific color type diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index 549ae387..b5580179 100644 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -63,7 +63,7 @@ parser.add_argument('-ct', '--color_type', help='filter on color', type=str, required=False, choices=['full-color', 'black-and-white', 'transparent']) parser.add_argument('-r', '--usage_rights', help='usage rights', type=str, required=False, - choices=['labled-for-reuse-with-modifications','labled-for-reuse','labled-for-noncommercial-reuse-with-modification','labled-for-nocommercial-reuse']) + choices=['labeled-for-reuse-with-modifications','labeled-for-reuse','labeled-for-noncommercial-reuse-with-modification','labeled-for-nocommercial-reuse']) parser.add_argument('-s', '--size', help='image size', type=str, required=False, choices=['large','medium','icon','>400*300','>640*480','>800*600','>1024*768','>2MP','>4MP','>6MP','>8MP','>10MP','>12MP','>15MP','>20MP','>40MP','>70MP']) parser.add_argument('-t', '--type', help='image type', type=str, required=False, @@ -328,7 +328,7 @@ def build_url_parameters(): counter = 0 params = {'color':[arguments['color'],{'red':'ic:specific,isc:red', 'orange':'ic:specific,isc:orange', 'yellow':'ic:specific,isc:yellow', 'green':'ic:specific,isc:green', 'teal':'ic:specific,isc:teel', 'blue':'ic:specific,isc:blue', 'purple':'ic:specific,isc:purple', 'pink':'ic:specific,isc:pink', 'white':'ic:specific,isc:white', 'gray':'ic:specific,isc:gray', 'black':'ic:specific,isc:black', 'brown':'ic:specific,isc:brown'}], 'color_type':[arguments['color_type'],{'full-color':'ic:color', 'black-and-white':'ic:gray','transparent':'ic:trans'}], - 'usage_rights':[arguments['usage_rights'],{'labled-for-reuse-with-modifications':'sur:fmc','labled-for-reuse':'sur:fc','labled-for-noncommercial-reuse-with-modification':'sur:fm','labled-for-nocommercial-reuse':'sur:f'}], + 'usage_rights':[arguments['usage_rights'],{'labeled-for-reuse-with-modifications':'sur:fmc','labeled-for-reuse':'sur:fc','labeled-for-noncommercial-reuse-with-modification':'sur:fm','labeled-for-nocommercial-reuse':'sur:f'}], 'size':[arguments['size'],{'large':'isz:l','medium':'isz:m','icon':'isz:i','>400*300':'isz:lt,islt:qsvga','>640*480':'isz:lt,islt:vga','>800*600':'isz:lt,islt:svga','>1024*768':'visz:lt,islt:xga','>2MP':'isz:lt,islt:2mp','>4MP':'isz:lt,islt:4mp','>6MP':'isz:lt,islt:6mp','>8MP':'isz:lt,islt:8mp','>10MP':'isz:lt,islt:10mp','>12MP':'isz:lt,islt:12mp','>15MP':'isz:lt,islt:15mp','>20MP':'isz:lt,islt:20mp','>40MP':'isz:lt,islt:40mp','>70MP':'isz:lt,islt:70mp'}], 'type':[arguments['type'],{'face':'itp:face','photo':'itp:photo','clip-art':'itp:clip-art','line-drawing':'itp:lineart','animated':'itp:animated'}], 'time':[arguments['time'],{'past-24-hours':'qdr:d','past-7-days':'qdr:w'}], diff --git a/setup.py b/setup.py index 0f7667e8..3f5f1870 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from codecs import open from os import path -__version__ = '1.4.3' +__version__ = '1.4.4' here = path.abspath(path.dirname(__file__)) From 11a2b4bf4b58d1afc92bfee9be581d9f3f785ec3 Mon Sep 17 00:00:00 2001 From: Vasa Date: Mon, 2 Apr 2018 11:04:01 -0700 Subject: [PATCH 29/83] corrected typos related to 'usage rights' filter --- README.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index 525b454d..c25b2397 100644 --- a/README.rst +++ b/README.rst @@ -140,10 +140,10 @@ Arguments | | | | | | | `Possible values:` | | | | | -| | | * `labeled-for-reuse-with-modifications`, | -| | | * `labeled-for-reuse`, | -| | | * `labeled-for-noncommercial-reuse-with-modification`, | -| | | * `labeled-for-nocommercial-reuse` | +| | | * `labeled-for-reuse-with-modifications`, | +| | | * `labeled-for-reuse`, | +| | | * `labeled-for-noncommercial-reuse-with-modification`, | +| | | * `labeled-for-nocommercial-reuse` | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | size | s | Denotes the relative size of the image to be downloaded. | | | | | From f6981497cce945fca116dab43a9e632f2cbe95d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eva=20=C5=A0mij=C3=A1kov=C3=A1?= Date: Mon, 2 Apr 2018 23:28:12 +0200 Subject: [PATCH 30/83] Fix missing None arguments for python3 (#58) --- google_images_download/google_images_download.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index b5580179..0c05845a 100644 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -33,7 +33,7 @@ object_check = vars(config_file_check[0]) if object_check['config_file'] != '': - args_list = ["keywords","keywords_from_file","suffix_keywords","limit","format","url","single_image","output_directory","delay","color","color_type","usage_rights","size","type","time","time_range","aspect_ratio","similar_images","specific_site","print_urls","print_size","metadata","extract_metadata","socket_timeout","thumbnail","language","prefix","proxy"] + args_list = ["keywords","keywords_from_file","suffix_keywords","prefix_keywords","limit","format","url","single_image","output_directory","delay","color","color_type","usage_rights","size","type","time","time_range","aspect_ratio","similar_images","specific_site","print_urls","print_size","metadata","extract_metadata","socket_timeout","thumbnail","language","prefix","proxy","related_images"] records = [] json_file = json.load(open(config_file_check[0].config_file)) for record in range(0,len(json_file['Records'])): @@ -81,7 +81,7 @@ parser.add_argument('-e', '--extract_metadata', default=False, help="Dumps all the logs into a text file", action="store_true") parser.add_argument('-st', '--socket_timeout', default=False, help="Connection timeout waiting for the image to download", type=float) parser.add_argument('-th', '--thumbnail', default=False, help="Downloads image thumbnail along with the actual image", action="store_true") - parser.add_argument('-la', '--language', default=False, help="Defines the language filter. The search results are authomatically returned in that language", type=str, required=False, + parser.add_argument('-la', '--language', default=False, help="Defines the language filter. The search results are automatically returned in that language", type=str, required=False, choices=['Arabic','Chinese (Simplified)','Chinese (Traditional)','Czech','Danish','Dutch','English','Estonian','Finnish','French','German','Greek','Hebrew','Hungarian','Icelandic','Italian','Japanese','Korean','Latvian','Lithuanian','Norwegian','Portuguese','Polish','Romanian','Russian','Spanish','Swedish','Turkish']) parser.add_argument('-pr', '--prefix', default=False, help="A word that you would want to prefix in front of each image name", type=str, required=False) parser.add_argument('-px', '--proxy', help='specify a proxy address and port', type=str, required=False) From 7d243ed79eabd7a5de5f550dbd81fd2be08c0dcd Mon Sep 17 00:00:00 2001 From: "M.A. Anjum" Date: Wed, 4 Apr 2018 06:22:00 +0500 Subject: [PATCH 31/83] Add prefix to single_image downloads (#60) --- google_images_download/google_images_download.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index 0c05845a..273b5dff 100644 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -233,6 +233,8 @@ def format_object(object): #function to download single image def single_image(): url = arguments['single_image'] + prefix = arguments['prefix'] if arguments['prefix'] else '' + try: os.makedirs(main_directory) except OSError as e: @@ -246,15 +248,15 @@ def single_image(): if '?' in image_name: image_name = image_name[:image_name.find('?')] if ".jpg" in image_name or ".gif" in image_name or ".png" in image_name or ".bmp" in image_name or ".svg" in image_name or ".webp" in image_name or ".ico" in image_name: - output_file = open(main_directory + "/" + image_name, 'wb') + output_file = open(main_directory + "/" + prefix + image_name, 'wb') else: - output_file = open(main_directory + "/" + image_name + ".jpg", 'wb') + output_file = open(main_directory + "/" + prefix + image_name + ".jpg", 'wb') image_name = image_name + ".jpg" data = response.read() output_file.write(data) response.close() - print("completed ====> " + image_name) + print("completed ====> " + prefix + image_name) return def similar_images(): From 10c9502539749f41dcd0fb1f6137a6433050d003 Mon Sep 17 00:00:00 2001 From: Hardik Vasa Date: Wed, 4 Apr 2018 21:13:06 -0700 Subject: [PATCH 32/83] Update google_images_download.py feature to use this lib from another python file changed the algorithm to not use global variables changed the method name to download() for simplicity use class to host methods so we can create objects when used from another python file error count inside the main function and reports for individual keywords corrected the arguments list changed the color argument short hand to co from c cleaned the code and removed redundant code lines --- .../google_images_download.py | 1384 +++++++++-------- 1 file changed, 696 insertions(+), 688 deletions(-) diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index 273b5dff..a4477dbd 100644 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -27,760 +27,768 @@ import re import codecs -config = argparse.ArgumentParser() -config.add_argument('-cf', '--config_file', help='config file name', default='', type=str, required=False) -config_file_check = config.parse_known_args() -object_check = vars(config_file_check[0]) - -if object_check['config_file'] != '': - args_list = ["keywords","keywords_from_file","suffix_keywords","prefix_keywords","limit","format","url","single_image","output_directory","delay","color","color_type","usage_rights","size","type","time","time_range","aspect_ratio","similar_images","specific_site","print_urls","print_size","metadata","extract_metadata","socket_timeout","thumbnail","language","prefix","proxy","related_images"] - records = [] - json_file = json.load(open(config_file_check[0].config_file)) - for record in range(0,len(json_file['Records'])): - arguments = {} - for i in args_list: - arguments[i] = None - for key, value in json_file['Records'][record].items(): - arguments[key] = value +args_list = ["keywords", "keywords_from_file", "prefix_keywords", "suffix_keywords", + "limit", "related_images", "format", "color", "color_type", "usage_rights", "size", + "aspect_ratio", "type", "time", "time_range", "delay", "url", "single_image", + "output_directory", "proxy", "similar_images", "specific_site", "print_urls", "print_size", + "metadata", "extract_metadata", "socket_timeout", "thumbnail", "language", "prefix", "chromedriver"] + + +def user_input(): + config = argparse.ArgumentParser() + config.add_argument('-cf', '--config_file', help='config file name', default='', type=str, required=False) + config_file_check = config.parse_known_args() + object_check = vars(config_file_check[0]) + + if object_check['config_file'] != '': + records = [] + json_file = json.load(open(config_file_check[0].config_file)) + for record in range(0,len(json_file['Records'])): + arguments = {} + for i in args_list: + arguments[i] = None + for key, value in json_file['Records'][record].items(): + arguments[key] = value + records.append(arguments) + records_count = len(records) + else: + # Taking command line arguments from users + parser = argparse.ArgumentParser() + parser.add_argument('-k', '--keywords', help='delimited list input', type=str, required=False) + parser.add_argument('-kf', '--keywords_from_file', help='extract list of keywords from a text file', type=str, required=False) + parser.add_argument('-sk', '--suffix_keywords', help='comma separated additional words added after to main keyword', type=str, required=False) + parser.add_argument('-pk', '--prefix_keywords', help='comma separated additional words added before main keyword', type=str, required=False) + parser.add_argument('-l', '--limit', help='delimited list input', type=str, required=False) + parser.add_argument('-f', '--format', help='download images with specific format', type=str, required=False, + choices=['jpg', 'gif', 'png', 'bmp', 'svg', 'webp', 'ico']) + parser.add_argument('-u', '--url', help='search with google image URL', type=str, required=False) + parser.add_argument('-x', '--single_image', help='downloading a single image from URL', type=str, required=False) + parser.add_argument('-o', '--output_directory', help='download images in a specific directory', type=str, required=False) + parser.add_argument('-d', '--delay', help='delay in seconds to wait between downloading two images', type=int, required=False) + parser.add_argument('-co', '--color', help='filter on color', type=str, required=False, + choices=['red', 'orange', 'yellow', 'green', 'teal', 'blue', 'purple', 'pink', 'white', 'gray', 'black', 'brown']) + parser.add_argument('-ct', '--color_type', help='filter on color', type=str, required=False, + choices=['full-color', 'black-and-white', 'transparent']) + parser.add_argument('-r', '--usage_rights', help='usage rights', type=str, required=False, + choices=['labeled-for-reuse-with-modifications','labeled-for-reuse','labeled-for-noncommercial-reuse-with-modification','labeled-for-nocommercial-reuse']) + parser.add_argument('-s', '--size', help='image size', type=str, required=False, + choices=['large','medium','icon','>400*300','>640*480','>800*600','>1024*768','>2MP','>4MP','>6MP','>8MP','>10MP','>12MP','>15MP','>20MP','>40MP','>70MP']) + parser.add_argument('-t', '--type', help='image type', type=str, required=False, + choices=['face','photo','clip-art','line-drawing','animated']) + parser.add_argument('-w', '--time', help='image age', type=str, required=False, + choices=['past-24-hours','past-7-days']) + parser.add_argument('-wr', '--time_range', help='time range for the age of the image. should be in the format {"time_min":"MM/DD/YYYY","time_max":"MM/DD/YYYY"}', type=str, required=False) + parser.add_argument('-a', '--aspect_ratio', help='comma separated additional words added to keywords', type=str, required=False, + choices=['tall', 'square', 'wide', 'panoramic']) + parser.add_argument('-si', '--similar_images', help='downloads images very similar to the image URL you provide', type=str, required=False) + parser.add_argument('-ss', '--specific_site', help='downloads images that are indexed from a specific website', type=str, required=False) + parser.add_argument('-p', '--print_urls', default=False, help="Print the URLs of the images", action="store_true") + parser.add_argument('-ps', '--print_size', default=False, help="Print the size of the images on disk", action="store_true") + parser.add_argument('-m', '--metadata', default=False, help="Print the metadata of the image", action="store_true") + parser.add_argument('-e', '--extract_metadata', default=False, help="Dumps all the logs into a text file", action="store_true") + parser.add_argument('-st', '--socket_timeout', default=False, help="Connection timeout waiting for the image to download", type=float) + parser.add_argument('-th', '--thumbnail', default=False, help="Downloads image thumbnail along with the actual image", action="store_true") + parser.add_argument('-la', '--language', default=False, help="Defines the language filter. The search results are authomatically returned in that language", type=str, required=False, + choices=['Arabic','Chinese (Simplified)','Chinese (Traditional)','Czech','Danish','Dutch','English','Estonian','Finnish','French','German','Greek','Hebrew','Hungarian','Icelandic','Italian','Japanese','Korean','Latvian','Lithuanian','Norwegian','Portuguese','Polish','Romanian','Russian','Spanish','Swedish','Turkish']) + parser.add_argument('-pr', '--prefix', default=False, help="A word that you would want to prefix in front of each image name", type=str, required=False) + parser.add_argument('-px', '--proxy', help='specify a proxy address and port', type=str, required=False) + parser.add_argument('-cd', '--chromedriver', help='specify the path to chromedriver executable in your local machine', type=str, required=False) + parser.add_argument('-ri', '--related_images', default=False, help="Downloads images that are similar to the keyword provided", action="store_true") + + args = parser.parse_args() + arguments = vars(args) + records = [] records.append(arguments) - records_count = len(records) -else: - # Taking command line arguments from users - parser = argparse.ArgumentParser() - parser.add_argument('-k', '--keywords', help='delimited list input', type=str, required=False) - parser.add_argument('-kf', '--keywords_from_file', help='extract list of keywords from a text file', type=str, required=False) - parser.add_argument('-sk', '--suffix_keywords', help='comma separated additional words added after to main keyword', type=str, required=False) - parser.add_argument('-pk', '--prefix_keywords', help='comma separated additional words added before main keyword', type=str, required=False) - parser.add_argument('-l', '--limit', help='delimited list input', type=str, required=False) - parser.add_argument('-f', '--format', help='download images with specific format', type=str, required=False, - choices=['jpg', 'gif', 'png', 'bmp', 'svg', 'webp', 'ico']) - parser.add_argument('-u', '--url', help='search with google image URL', type=str, required=False) - parser.add_argument('-x', '--single_image', help='downloading a single image from URL', type=str, required=False) - parser.add_argument('-o', '--output_directory', help='download images in a specific directory', type=str, required=False) - parser.add_argument('-d', '--delay', help='delay in seconds to wait between downloading two images', type=str, required=False) - parser.add_argument('-c', '--color', help='filter on color', type=str, required=False, - choices=['red', 'orange', 'yellow', 'green', 'teal', 'blue', 'purple', 'pink', 'white', 'gray', 'black', 'brown']) - parser.add_argument('-ct', '--color_type', help='filter on color', type=str, required=False, - choices=['full-color', 'black-and-white', 'transparent']) - parser.add_argument('-r', '--usage_rights', help='usage rights', type=str, required=False, - choices=['labeled-for-reuse-with-modifications','labeled-for-reuse','labeled-for-noncommercial-reuse-with-modification','labeled-for-nocommercial-reuse']) - parser.add_argument('-s', '--size', help='image size', type=str, required=False, - choices=['large','medium','icon','>400*300','>640*480','>800*600','>1024*768','>2MP','>4MP','>6MP','>8MP','>10MP','>12MP','>15MP','>20MP','>40MP','>70MP']) - parser.add_argument('-t', '--type', help='image type', type=str, required=False, - choices=['face','photo','clip-art','line-drawing','animated']) - parser.add_argument('-w', '--time', help='image age', type=str, required=False, - choices=['past-24-hours','past-7-days']) - parser.add_argument('-wr', '--time_range', help='time range for the age of the image. should be in the format {"time_min":"MM/DD/YYYY","time_max":"MM/DD/YYYY"}', type=str, required=False) - parser.add_argument('-a', '--aspect_ratio', help='comma separated additional words added to keywords', type=str, required=False, - choices=['tall', 'square', 'wide', 'panoramic']) - parser.add_argument('-si', '--similar_images', help='downloads images very similar to the image URL you provide', type=str, required=False) - parser.add_argument('-ss', '--specific_site', help='downloads images that are indexed from a specific website', type=str, required=False) - parser.add_argument('-p', '--print_urls', default=False, help="Print the URLs of the images", action="store_true") - parser.add_argument('-ps', '--print_size', default=False, help="Print the size of the images on disk", action="store_true") - parser.add_argument('-m', '--metadata', default=False, help="Print the metadata of the image", action="store_true") - parser.add_argument('-e', '--extract_metadata', default=False, help="Dumps all the logs into a text file", action="store_true") - parser.add_argument('-st', '--socket_timeout', default=False, help="Connection timeout waiting for the image to download", type=float) - parser.add_argument('-th', '--thumbnail', default=False, help="Downloads image thumbnail along with the actual image", action="store_true") - parser.add_argument('-la', '--language', default=False, help="Defines the language filter. The search results are automatically returned in that language", type=str, required=False, - choices=['Arabic','Chinese (Simplified)','Chinese (Traditional)','Czech','Danish','Dutch','English','Estonian','Finnish','French','German','Greek','Hebrew','Hungarian','Icelandic','Italian','Japanese','Korean','Latvian','Lithuanian','Norwegian','Portuguese','Polish','Romanian','Russian','Spanish','Swedish','Turkish']) - parser.add_argument('-pr', '--prefix', default=False, help="A word that you would want to prefix in front of each image name", type=str, required=False) - parser.add_argument('-px', '--proxy', help='specify a proxy address and port', type=str, required=False) - parser.add_argument('-cd', '--chromedriver', help='specify the path to chromedriver executable in your local machine', type=str, required=False) - parser.add_argument('-ri', '--related_images', default=False, help="Downloads images that are similar to the keyword provided", action="store_true") - - args = parser.parse_args() - arguments = vars(args) - records = [] - records.append(arguments) - records_count = len(records) - -# Downloading entire Web Document (Raw Page Content) -def download_page(url): - version = (3, 0) - cur_version = sys.version_info - if cur_version >= version: # If the Current Version of Python is 3.0 or above - try: - headers = {} - headers['User-Agent'] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36" - req = urllib.request.Request(url, headers=headers) - resp = urllib.request.urlopen(req) - respData = str(resp.read()) - return respData - except Exception as e: - print(str(e)) - else: # If the Current Version of Python is 2.x - try: - headers = {} - headers['User-Agent'] = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17" - req = urllib2.Request(url, headers=headers) - try: - response = urllib2.urlopen(req) - except URLError: # Handling SSL certificate failed - context = ssl._create_unverified_context() - response = urlopen(req, context=context) - page = response.read() - return page - except: - return "Page Not found" - -# Download Page for more than 100 images -def download_extended_page(url): - from selenium import webdriver - from selenium.webdriver.common.keys import Keys - if sys.version_info[0] < 3: - reload(sys) - sys.setdefaultencoding('utf8') - options = webdriver.ChromeOptions() - options.add_argument('--no-sandbox') - options.add_argument("--headless") - - try: - browser = webdriver.Chrome(arguments['chromedriver'], chrome_options=options) - except: - print("Looks like we cannot locate the path the 'chromedriver' (use the '--chromedriver' " - "argument to specify the path to the executable.) or google chrome browser is not " - "installed on your machine") - sys.exit() - browser.set_window_size(1024, 768) - - # Open the link - browser.get(url) - time.sleep(1) - print("Getting you a lot of images. This may take a few moments...") - - element = browser.find_element_by_tag_name("body") - # Scroll down - for i in range(30): - element.send_keys(Keys.PAGE_DOWN) - time.sleep(0.3) - - try: - browser.find_element_by_id("smb").click() - for i in range(50): - element.send_keys(Keys.PAGE_DOWN) - time.sleep(0.3) # bot id protection - except: - for i in range(10): - element.send_keys(Keys.PAGE_DOWN) - time.sleep(0.3) # bot id protection + return records - print("Reached end of Page.") - time.sleep(0.5) - source = browser.page_source #page source - #close the browser - browser.close() +class googleimagesdownload: + def __init__(self): + pass - return source + # Downloading entire Web Document (Raw Page Content) + def download_page(self,url): + version = (3, 0) + cur_version = sys.version_info + if cur_version >= version: # If the Current Version of Python is 3.0 or above + try: + headers = {} + headers['User-Agent'] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36" + req = urllib.request.Request(url, headers=headers) + resp = urllib.request.urlopen(req) + respData = str(resp.read()) + return respData + except Exception as e: + print(str(e)) + else: # If the Current Version of Python is 2.x + try: + headers = {} + headers['User-Agent'] = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17" + req = urllib2.Request(url, headers=headers) + try: + response = urllib2.urlopen(req) + except URLError: # Handling SSL certificate failed + context = ssl._create_unverified_context() + response = urlopen(req, context=context) + page = response.read() + return page + except: + return "Page Not found" -#Correcting the escape characters for python2 -def replace_with_byte(match): - return chr(int(match.group(0)[1:], 8)) -def repair(brokenjson): - invalid_escape = re.compile(r'\\[0-7]{1,3}') # up to 3 digits for byte values up to FF - return invalid_escape.sub(replace_with_byte, brokenjson) + # Download Page for more than 100 images + def download_extended_page(self,url,chromedriver): + from selenium import webdriver + from selenium.webdriver.common.keys import Keys + if sys.version_info[0] < 3: + reload(sys) + sys.setdefaultencoding('utf8') + options = webdriver.ChromeOptions() + options.add_argument('--no-sandbox') + options.add_argument("--headless") -# Finding 'Next Image' from the given raw page -def get_next_tab(s): - start_line = s.find('class="ZO5Spb"') - if start_line == -1: # If no links are found then give an error! - end_quote = 0 - link = "no_tabs" - return link,'',end_quote - else: - start_line = s.find('class="ZO5Spb"') - start_content = s.find('href="', start_line + 1) - end_content = s.find('">', start_content + 1) - url_item = "https://www.google.com" + str(s[start_content+6:end_content]) - url_item = url_item.replace('&', '&') - - start_line_2 = s.find('class="ZO5Spb"') - start_content_2 = s.find(':', start_line_2 + 1) - end_content_2 = s.find('"', start_content_2 + 1) - url_item_name = str(s[start_content_2 + 1:end_content_2]) - - #print(url_item,url_item_name) - return url_item,url_item_name,end_content - - -# Getting all links with the help of '_images_get_next_image' -def get_all_tabs(page): - tabs = {} - while True: - item,item_name,end_content = get_next_tab(page) - if item == "no_tabs": - break - else: - tabs[item_name] = item # Append all the links in the list named 'Links' - time.sleep(0.1) # Timer could be used to slow down the request for image downloads - page = page[end_content:] - return tabs - -#Format the object in readable format -def format_object(object): - formatted_object = {} - formatted_object['image_format'] = object['ity'] - formatted_object['image_height'] = object['oh'] - formatted_object['image_width'] = object['ow'] - formatted_object['image_link'] = object['ou'] - formatted_object['image_description'] = object['pt'] - formatted_object['image_host'] = object['rh'] - formatted_object['image_source'] = object['ru'] - formatted_object['image_thumbnail_url'] = object['tu'] - return formatted_object - -#function to download single image -def single_image(): - url = arguments['single_image'] - prefix = arguments['prefix'] if arguments['prefix'] else '' - - try: - os.makedirs(main_directory) - except OSError as e: - if e.errno != 17: - raise - pass - req = Request(url, headers={ - "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"}) - response = urlopen(req, None, 10) - image_name = str(url[(url.rfind('/')) + 1:]) - if '?' in image_name: - image_name = image_name[:image_name.find('?')] - if ".jpg" in image_name or ".gif" in image_name or ".png" in image_name or ".bmp" in image_name or ".svg" in image_name or ".webp" in image_name or ".ico" in image_name: - output_file = open(main_directory + "/" + prefix + image_name, 'wb') - else: - output_file = open(main_directory + "/" + prefix + image_name + ".jpg", 'wb') - image_name = image_name + ".jpg" - - data = response.read() - output_file.write(data) - response.close() - print("completed ====> " + prefix + image_name) - return - -def similar_images(): - version = (3, 0) - cur_version = sys.version_info - if cur_version >= version: # If the Current Version of Python is 3.0 or above try: - searchUrl = 'https://www.google.com/searchbyimage?site=search&sa=X&image_url=' + arguments['similar_images'] - headers = {} - headers['User-Agent'] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36" - - req1 = urllib.request.Request(searchUrl, headers=headers) - resp1 = urllib.request.urlopen(req1) - content = str(resp1.read()) - l1 = content.find('AMhZZ') - l2 = content.find('&', l1) - urll = content[l1:l2] - - newurl = "https://www.google.com/search?tbs=sbi:" + urll + "&site=search&sa=X" - req2 = urllib.request.Request(newurl, headers=headers) - resp2 = urllib.request.urlopen(req2) - # print(resp2.read()) - l3 = content.find('/search?sa=X&q=') - l4 = content.find(';', l3 + 19) - urll2 = content[l3 + 19:l4] - return urll2 + browser = webdriver.Chrome(chromedriver, chrome_options=options) except: - return "Cloud not connect to Google Images endpoint" - else: # If the Current Version of Python is 2.x + print("Looks like we cannot locate the path the 'chromedriver' (use the '--chromedriver' " + "argument to specify the path to the executable.) or google chrome browser is not " + "installed on your machine") + sys.exit() + browser.set_window_size(1024, 768) + + # Open the link + browser.get(url) + time.sleep(1) + print("Getting you a lot of images. This may take a few moments...") + + element = browser.find_element_by_tag_name("body") + # Scroll down + for i in range(30): + element.send_keys(Keys.PAGE_DOWN) + time.sleep(0.3) + try: - searchUrl = 'https://www.google.com/searchbyimage?site=search&sa=X&image_url=' + arguments['similar_images'] - headers = {} - headers['User-Agent'] = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17" - - req1 = urllib2.Request(searchUrl, headers=headers) - resp1 = urllib2.urlopen(req1) - content = str(resp1.read()) - l1 = content.find('AMhZZ') - l2 = content.find('&', l1) - urll = content[l1:l2] - - newurl = "https://www.google.com/search?tbs=sbi:" + urll + "&site=search&sa=X" - #print newurl - req2 = urllib2.Request(newurl, headers=headers) - resp2 = urllib2.urlopen(req2) - # print(resp2.read()) - l3 = content.find('/search?sa=X&q=') - l4 = content.find(';', l3 + 19) - urll2 = content[l3 + 19:l4] - return(urll2) + browser.find_element_by_id("smb").click() + for i in range(50): + element.send_keys(Keys.PAGE_DOWN) + time.sleep(0.3) # bot id protection except: - return "Cloud not connect to Google Images endpoint" - -#Building URL parameters -def build_url_parameters(): - if arguments['language']: - lang = "&lr=" - lang_param = {"Arabic":"lang_ar","Chinese (Simplified)":"lang_zh-CN","Chinese (Traditional)":"lang_zh-TW","Czech":"lang_cs","Danish":"lang_da","Dutch":"lang_nl","English":"lang_en","Estonian":"lang_et","Finnish":"lang_fi","French":"lang_fr","German":"lang_de","Greek":"lang_el","Hebrew":"lang_iw ","Hungarian":"lang_hu","Icelandic":"lang_is","Italian":"lang_it","Japanese":"lang_ja","Korean":"lang_ko","Latvian":"lang_lv","Lithuanian":"lang_lt","Norwegian":"lang_no","Portuguese":"lang_pt","Polish":"lang_pl","Romanian":"lang_ro","Russian":"lang_ru","Spanish":"lang_es","Swedish":"lang_sv","Turkish":"lang_tr"} - lang_url = lang+lang_param[arguments['language']] - else: - lang_url = '' + for i in range(10): + element.send_keys(Keys.PAGE_DOWN) + time.sleep(0.3) # bot id protection - if arguments['time_range']: - json_acceptable_string = arguments['time_range'].replace("'", "\"") - d = json.loads(json_acceptable_string) - time_range = '&cdr:1,cd_min:' + d['time_min'] + ',cd_max:' + d['time_min'] - else: - time_range = '' - - built_url = "&tbs=" - counter = 0 - params = {'color':[arguments['color'],{'red':'ic:specific,isc:red', 'orange':'ic:specific,isc:orange', 'yellow':'ic:specific,isc:yellow', 'green':'ic:specific,isc:green', 'teal':'ic:specific,isc:teel', 'blue':'ic:specific,isc:blue', 'purple':'ic:specific,isc:purple', 'pink':'ic:specific,isc:pink', 'white':'ic:specific,isc:white', 'gray':'ic:specific,isc:gray', 'black':'ic:specific,isc:black', 'brown':'ic:specific,isc:brown'}], - 'color_type':[arguments['color_type'],{'full-color':'ic:color', 'black-and-white':'ic:gray','transparent':'ic:trans'}], - 'usage_rights':[arguments['usage_rights'],{'labeled-for-reuse-with-modifications':'sur:fmc','labeled-for-reuse':'sur:fc','labeled-for-noncommercial-reuse-with-modification':'sur:fm','labeled-for-nocommercial-reuse':'sur:f'}], - 'size':[arguments['size'],{'large':'isz:l','medium':'isz:m','icon':'isz:i','>400*300':'isz:lt,islt:qsvga','>640*480':'isz:lt,islt:vga','>800*600':'isz:lt,islt:svga','>1024*768':'visz:lt,islt:xga','>2MP':'isz:lt,islt:2mp','>4MP':'isz:lt,islt:4mp','>6MP':'isz:lt,islt:6mp','>8MP':'isz:lt,islt:8mp','>10MP':'isz:lt,islt:10mp','>12MP':'isz:lt,islt:12mp','>15MP':'isz:lt,islt:15mp','>20MP':'isz:lt,islt:20mp','>40MP':'isz:lt,islt:40mp','>70MP':'isz:lt,islt:70mp'}], - 'type':[arguments['type'],{'face':'itp:face','photo':'itp:photo','clip-art':'itp:clip-art','line-drawing':'itp:lineart','animated':'itp:animated'}], - 'time':[arguments['time'],{'past-24-hours':'qdr:d','past-7-days':'qdr:w'}], - 'aspect_ratio':[arguments['aspect_ratio'],{'tall':'iar:t','square':'iar:s','wide':'iar:w','panoramic':'iar:xw'}], - 'format':[arguments['format'],{'jpg':'ift:jpg','gif':'ift:gif','png':'ift:png','bmp':'ift:bmp','svg':'ift:svg','webp':'webp','ico':'ift:ico'}]} - for key, value in params.items(): - if value[0] is not None: - ext_param = value[1][value[0]] - # counter will tell if it is first param added or not - if counter == 0: - # add it to the built url - built_url = built_url + ext_param - counter += 1 + print("Reached end of Page.") + time.sleep(0.5) + + source = browser.page_source #page source + #close the browser + browser.close() + + return source + + + #Correcting the escape characters for python2 + def replace_with_byte(self,match): + return chr(int(match.group(0)[1:], 8)) + + def repair(self,brokenjson): + invalid_escape = re.compile(r'\\[0-7]{1,3}') # up to 3 digits for byte values up to FF + return invalid_escape.sub(self.replace_with_byte, brokenjson) + + + # Finding 'Next Image' from the given raw page + def get_next_tab(self,s): + start_line = s.find('class="ZO5Spb"') + if start_line == -1: # If no links are found then give an error! + end_quote = 0 + link = "no_tabs" + return link,'',end_quote + else: + start_line = s.find('class="ZO5Spb"') + start_content = s.find('href="', start_line + 1) + end_content = s.find('">', start_content + 1) + url_item = "https://www.google.com" + str(s[start_content+6:end_content]) + url_item = url_item.replace('&', '&') + + start_line_2 = s.find('class="ZO5Spb"') + start_content_2 = s.find(':', start_line_2 + 1) + end_content_2 = s.find('"', start_content_2 + 1) + url_item_name = str(s[start_content_2 + 1:end_content_2]) + + #print(url_item,url_item_name) + return url_item,url_item_name,end_content + + + # Getting all links with the help of '_images_get_next_image' + def get_all_tabs(self,page): + tabs = {} + while True: + item,item_name,end_content = self.get_next_tab(page) + if item == "no_tabs": + break else: - built_url = built_url + ',' + ext_param - counter += 1 - built_url = lang_url+built_url+time_range - return built_url - -#building main search URL -def build_search_url(search_term,params): - # check the args and choose the URL - if arguments['url']: - url = arguments['url'] - elif arguments['similar_images']: - keywordem = similar_images() - url = 'https://www.google.com/search?q=' + keywordem + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg' - elif arguments['specific_site']: - url = 'https://www.google.com/search?q=' + quote( - search_term) + '&as_sitesearch=' + arguments['specific_site'] + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch' + params + '&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg' - else: - url = 'https://www.google.com/search?q=' + quote( - search_term) + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch' + params + '&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg' - #print(url) - return url - -#measures the file size -def file_size(file_path): - if os.path.isfile(file_path): - file_info = os.stat(file_path) - size = file_info.st_size - for x in ['bytes', 'KB', 'MB', 'GB', 'TB']: - if size < 1024.0: - return "%3.1f %s" % (size, x) - size /= 1024.0 - return size - -# make directories -def create_directories(main_directory, dir_name): - dir_name_thumbnail = dir_name + " - thumbnail" - # make a search keyword directory - try: - if not os.path.exists(main_directory): + tabs[item_name] = item # Append all the links in the list named 'Links' + time.sleep(0.1) # Timer could be used to slow down the request for image downloads + page = page[end_content:] + return tabs + + + #Format the object in readable format + def format_object(self,object): + formatted_object = {} + formatted_object['image_format'] = object['ity'] + formatted_object['image_height'] = object['oh'] + formatted_object['image_width'] = object['ow'] + formatted_object['image_link'] = object['ou'] + formatted_object['image_description'] = object['pt'] + formatted_object['image_host'] = object['rh'] + formatted_object['image_source'] = object['ru'] + formatted_object['image_thumbnail_url'] = object['tu'] + return formatted_object + + + #function to download single image + def single_image(self,image_url): + main_directory = "downloads" + url = image_url + try: os.makedirs(main_directory) - time.sleep(0.2) - path = str(dir_name) - sub_directory = os.path.join(main_directory, path) - if not os.path.exists(sub_directory): - os.makedirs(sub_directory) - if arguments['thumbnail']: - sub_directory_thumbnail = os.path.join(main_directory, dir_name_thumbnail) - if not os.path.exists(sub_directory_thumbnail): - os.makedirs(sub_directory_thumbnail) + except OSError as e: + if e.errno != 17: + raise + pass + req = Request(url, headers={ + "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"}) + response = urlopen(req, None, 10) + image_name = str(url[(url.rfind('/')) + 1:]) + if '?' in image_name: + image_name = image_name[:image_name.find('?')] + if ".jpg" in image_name or ".gif" in image_name or ".png" in image_name or ".bmp" in image_name or ".svg" in image_name or ".webp" in image_name or ".ico" in image_name: + output_file = open(main_directory + "/" + image_name, 'wb') else: - path = str(dir_name) - sub_directory = os.path.join(main_directory, path) - if not os.path.exists(sub_directory): - os.makedirs(sub_directory) - if arguments['thumbnail']: - sub_directory_thumbnail = os.path.join(main_directory, dir_name_thumbnail) - if not os.path.exists(sub_directory_thumbnail): - os.makedirs(sub_directory_thumbnail) - except OSError as e: - if e.errno != 17: - raise - # time.sleep might help here - pass - return + output_file = open(main_directory + "/" + image_name + ".jpg", 'wb') + image_name = image_name + ".jpg" + data = response.read() + output_file.write(data) + response.close() + print("completed ====> " + image_name) + return -# Download Images -def download_image_thumbnail(image_url,main_directory,dir_name,return_image_name): - if arguments['print_urls']: - print("Image URL: " + image_url) - try: - req = Request(image_url, headers={ - "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"}) + def similar_images(self,similar_images): + version = (3, 0) + cur_version = sys.version_info + if cur_version >= version: # If the Current Version of Python is 3.0 or above + try: + searchUrl = 'https://www.google.com/searchbyimage?site=search&sa=X&image_url=' + similar_images + headers = {} + headers['User-Agent'] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36" + + req1 = urllib.request.Request(searchUrl, headers=headers) + resp1 = urllib.request.urlopen(req1) + content = str(resp1.read()) + l1 = content.find('AMhZZ') + l2 = content.find('&', l1) + urll = content[l1:l2] + + newurl = "https://www.google.com/search?tbs=sbi:" + urll + "&site=search&sa=X" + req2 = urllib.request.Request(newurl, headers=headers) + resp2 = urllib.request.urlopen(req2) + # print(resp2.read()) + l3 = content.find('/search?sa=X&q=') + l4 = content.find(';', l3 + 19) + urll2 = content[l3 + 19:l4] + return urll2 + except: + return "Cloud not connect to Google Images endpoint" + else: # If the Current Version of Python is 2.x + try: + searchUrl = 'https://www.google.com/searchbyimage?site=search&sa=X&image_url=' + similar_images + headers = {} + headers['User-Agent'] = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17" + + req1 = urllib2.Request(searchUrl, headers=headers) + resp1 = urllib2.urlopen(req1) + content = str(resp1.read()) + l1 = content.find('AMhZZ') + l2 = content.find('&', l1) + urll = content[l1:l2] + + newurl = "https://www.google.com/search?tbs=sbi:" + urll + "&site=search&sa=X" + #print newurl + req2 = urllib2.Request(newurl, headers=headers) + resp2 = urllib2.urlopen(req2) + # print(resp2.read()) + l3 = content.find('/search?sa=X&q=') + l4 = content.find(';', l3 + 19) + urll2 = content[l3 + 19:l4] + return(urll2) + except: + return "Cloud not connect to Google Images endpoint" + + #Building URL parameters + def build_url_parameters(self,arguments): + if arguments['language']: + lang = "&lr=" + lang_param = {"Arabic":"lang_ar","Chinese (Simplified)":"lang_zh-CN","Chinese (Traditional)":"lang_zh-TW","Czech":"lang_cs","Danish":"lang_da","Dutch":"lang_nl","English":"lang_en","Estonian":"lang_et","Finnish":"lang_fi","French":"lang_fr","German":"lang_de","Greek":"lang_el","Hebrew":"lang_iw ","Hungarian":"lang_hu","Icelandic":"lang_is","Italian":"lang_it","Japanese":"lang_ja","Korean":"lang_ko","Latvian":"lang_lv","Lithuanian":"lang_lt","Norwegian":"lang_no","Portuguese":"lang_pt","Polish":"lang_pl","Romanian":"lang_ro","Russian":"lang_ru","Spanish":"lang_es","Swedish":"lang_sv","Turkish":"lang_tr"} + lang_url = lang+lang_param[arguments['language']] + else: + lang_url = '' + + if arguments['time_range']: + json_acceptable_string = arguments['time_range'].replace("'", "\"") + d = json.loads(json_acceptable_string) + time_range = '&cdr:1,cd_min:' + d['time_min'] + ',cd_max:' + d['time_min'] + else: + time_range = '' + + built_url = "&tbs=" + counter = 0 + params = {'color':[arguments['color'],{'red':'ic:specific,isc:red', 'orange':'ic:specific,isc:orange', 'yellow':'ic:specific,isc:yellow', 'green':'ic:specific,isc:green', 'teal':'ic:specific,isc:teel', 'blue':'ic:specific,isc:blue', 'purple':'ic:specific,isc:purple', 'pink':'ic:specific,isc:pink', 'white':'ic:specific,isc:white', 'gray':'ic:specific,isc:gray', 'black':'ic:specific,isc:black', 'brown':'ic:specific,isc:brown'}], + 'color_type':[arguments['color_type'],{'full-color':'ic:color', 'black-and-white':'ic:gray','transparent':'ic:trans'}], + 'usage_rights':[arguments['usage_rights'],{'labeled-for-reuse-with-modifications':'sur:fmc','labeled-for-reuse':'sur:fc','labeled-for-noncommercial-reuse-with-modification':'sur:fm','labeled-for-nocommercial-reuse':'sur:f'}], + 'size':[arguments['size'],{'large':'isz:l','medium':'isz:m','icon':'isz:i','>400*300':'isz:lt,islt:qsvga','>640*480':'isz:lt,islt:vga','>800*600':'isz:lt,islt:svga','>1024*768':'visz:lt,islt:xga','>2MP':'isz:lt,islt:2mp','>4MP':'isz:lt,islt:4mp','>6MP':'isz:lt,islt:6mp','>8MP':'isz:lt,islt:8mp','>10MP':'isz:lt,islt:10mp','>12MP':'isz:lt,islt:12mp','>15MP':'isz:lt,islt:15mp','>20MP':'isz:lt,islt:20mp','>40MP':'isz:lt,islt:40mp','>70MP':'isz:lt,islt:70mp'}], + 'type':[arguments['type'],{'face':'itp:face','photo':'itp:photo','clip-art':'itp:clip-art','line-drawing':'itp:lineart','animated':'itp:animated'}], + 'time':[arguments['time'],{'past-24-hours':'qdr:d','past-7-days':'qdr:w'}], + 'aspect_ratio':[arguments['aspect_ratio'],{'tall':'iar:t','square':'iar:s','wide':'iar:w','panoramic':'iar:xw'}], + 'format':[arguments['format'],{'jpg':'ift:jpg','gif':'ift:gif','png':'ift:png','bmp':'ift:bmp','svg':'ift:svg','webp':'webp','ico':'ift:ico'}]} + for key, value in params.items(): + if value[0] is not None: + ext_param = value[1][value[0]] + # counter will tell if it is first param added or not + if counter == 0: + # add it to the built url + built_url = built_url + ext_param + counter += 1 + else: + built_url = built_url + ',' + ext_param + counter += 1 + built_url = lang_url+built_url+time_range + return built_url + + + #building main search URL + def build_search_url(self,search_term,params,url,similar_images,specific_site): + # check the args and choose the URL + if url: + url = url + elif similar_images: + print(similar_images) + keywordem = self.similar_images(similar_images) + url = 'https://www.google.com/search?q=' + keywordem + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg' + elif specific_site: + url = 'https://www.google.com/search?q=' + quote( + search_term) + '&as_sitesearch=' + specific_site + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch' + params + '&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg' + else: + url = 'https://www.google.com/search?q=' + quote( + search_term) + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch' + params + '&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg' + #print(url) + return url + + + #measures the file size + def file_size(self,file_path): + if os.path.isfile(file_path): + file_info = os.stat(file_path) + size = file_info.st_size + for x in ['bytes', 'KB', 'MB', 'GB', 'TB']: + if size < 1024.0: + return "%3.1f %s" % (size, x) + size /= 1024.0 + return size + + #keywords from file + def keywords_from_file(self,file_name): + search_keyword = [] + with codecs.open(file_name, 'r', encoding='utf-8-sig') as f: + if '.csv' in file_name: + for line in f: + if line in ['\n', '\r\n']: + pass + else: + search_keyword.append(line.replace('\n', '').replace('\r', '')) + elif '.txt' in file_name: + for line in f: + if line in ['\n', '\r\n']: + pass + else: + search_keyword.append(line.replace('\n', '').replace('\r', '')) + else: + print("Invalid file type: Valid file types are either .txt or .csv \n" + "exiting...") + sys.exit() + return search_keyword + + # make directories + def create_directories(self,main_directory, dir_name,thumbnail): + dir_name_thumbnail = dir_name + " - thumbnail" + # make a search keyword directory try: - # timeout time to download an image - if arguments['socket_timeout']: - timeout = float(arguments['socket_timeout']) + if not os.path.exists(main_directory): + os.makedirs(main_directory) + time.sleep(0.2) + path = str(dir_name) + sub_directory = os.path.join(main_directory, path) + if not os.path.exists(sub_directory): + os.makedirs(sub_directory) + if thumbnail: + sub_directory_thumbnail = os.path.join(main_directory, dir_name_thumbnail) + if not os.path.exists(sub_directory_thumbnail): + os.makedirs(sub_directory_thumbnail) else: - timeout = 10 - response = urlopen(req, None, timeout) + path = str(dir_name) + sub_directory = os.path.join(main_directory, path) + if not os.path.exists(sub_directory): + os.makedirs(sub_directory) + if thumbnail: + sub_directory_thumbnail = os.path.join(main_directory, dir_name_thumbnail) + if not os.path.exists(sub_directory_thumbnail): + os.makedirs(sub_directory_thumbnail) + except OSError as e: + if e.errno != 17: + raise + # time.sleep might help here + pass + return + + + # Download Images + def download_image_thumbnail(self,image_url,main_directory,dir_name,return_image_name,print_urls,socket_timeout,print_size): + if print_urls: + print("Image URL: " + image_url) + try: + req = Request(image_url, headers={ + "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"}) + try: + # timeout time to download an image + if socket_timeout: + timeout = float(socket_timeout) + else: + timeout = 10 + response = urlopen(req, None, timeout) - path = main_directory + "/" + dir_name + " - thumbnail" + "/" + return_image_name - output_file = open(path, 'wb') - data = response.read() - output_file.write(data) - response.close() + path = main_directory + "/" + dir_name + " - thumbnail" + "/" + return_image_name + output_file = open(path, 'wb') + data = response.read() + output_file.write(data) + response.close() - download_status = 'success' - download_message = "Completed Image Thumbnail ====> " + return_image_name + download_status = 'success' + download_message = "Completed Image Thumbnail ====> " + return_image_name - # image size parameter - if arguments['print_size']: - print("Image Size: " + str(file_size(path))) + # image size parameter + if print_size: + print("Image Size: " + str(self.file_size(path))) - except UnicodeEncodeError as e: - download_status = 'fail' - download_message = "UnicodeEncodeError on an image...trying next one..." + " Error: " + str(e) + except UnicodeEncodeError as e: + download_status = 'fail' + download_message = "UnicodeEncodeError on an image...trying next one..." + " Error: " + str(e) - except HTTPError as e: # If there is any HTTPError - download_status = 'fail' - download_message = "HTTPError on an image...trying next one..." + " Error: " + str(e) + except HTTPError as e: # If there is any HTTPError + download_status = 'fail' + download_message = "HTTPError on an image...trying next one..." + " Error: " + str(e) - except URLError as e: - download_status = 'fail' - download_message = "URLError on an image...trying next one..." + " Error: " + str(e) + except URLError as e: + download_status = 'fail' + download_message = "URLError on an image...trying next one..." + " Error: " + str(e) - except ssl.CertificateError as e: - download_status = 'fail' - download_message = "CertificateError on an image...trying next one..." + " Error: " + str(e) + except ssl.CertificateError as e: + download_status = 'fail' + download_message = "CertificateError on an image...trying next one..." + " Error: " + str(e) - except IOError as e: # If there is any IOError - download_status = 'fail' - download_message = "IOError on an image...trying next one..." + " Error: " + str(e) - return download_status, download_message + except IOError as e: # If there is any IOError + download_status = 'fail' + download_message = "IOError on an image...trying next one..." + " Error: " + str(e) + return download_status, download_message -# Download Images -def download_image(image_url,image_format,main_directory,dir_name,count): - if arguments['print_urls']: - print("Image URL: " + image_url) - try: - req = Request(image_url, headers={ - "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"}) + # Download Images + def download_image(self,image_url,image_format,main_directory,dir_name,count,print_urls,socket_timeout,prefix,print_size): + if print_urls: + print("Image URL: " + image_url) try: - # timeout time to download an image - if arguments['socket_timeout']: - timeout = float(arguments['socket_timeout']) - else: - timeout = 10 - response = urlopen(req, None, timeout) - - # keep everything after the last '/' - image_name = str(image_url[(image_url.rfind('/')) + 1:]) - image_name = image_name.lower() - # if no extension then add it - # remove everything after the image name - if image_format == "": - image_name = image_name + "." + "jpg" - elif image_format == "jpeg": - image_name = image_name[:image_name.find(image_format) + 4] - else: - image_name = image_name[:image_name.find(image_format) + 3] + req = Request(image_url, headers={ + "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"}) + try: + # timeout time to download an image + if socket_timeout: + timeout = float(socket_timeout) + else: + timeout = 10 + response = urlopen(req, None, timeout) + + # keep everything after the last '/' + image_name = str(image_url[(image_url.rfind('/')) + 1:]) + image_name = image_name.lower() + # if no extension then add it + # remove everything after the image name + if image_format == "": + image_name = image_name + "." + "jpg" + elif image_format == "jpeg": + image_name = image_name[:image_name.find(image_format) + 4] + else: + image_name = image_name[:image_name.find(image_format) + 3] - # prefix name in image - if arguments['prefix']: - prefix = arguments['prefix'] + " " - else: - prefix = '' + # prefix name in image + if prefix: + prefix = prefix + " " + else: + prefix = '' - path = main_directory + "/" + dir_name + "/" + prefix + str(count) + ". " + image_name - output_file = open(path, 'wb') - data = response.read() - output_file.write(data) - response.close() + path = main_directory + "/" + dir_name + "/" + prefix + str(count) + ". " + image_name + output_file = open(path, 'wb') + data = response.read() + output_file.write(data) + response.close() - #return image name back to calling method to use it for thumbnail downloads - return_image_name = prefix + str(count) + ". " + image_name + #return image name back to calling method to use it for thumbnail downloads + return_image_name = prefix + str(count) + ". " + image_name - download_status = 'success' - download_message = "Completed Image ====> " + prefix + str(count) + ". " + image_name + download_status = 'success' + download_message = "Completed Image ====> " + prefix + str(count) + ". " + image_name - # image size parameter - if arguments['print_size']: - print("Image Size: " + str(file_size(path))) + # image size parameter + if print_size: + print("Image Size: " + str(self.file_size(path))) - except UnicodeEncodeError as e: + except UnicodeEncodeError as e: + download_status = 'fail' + download_message = "UnicodeEncodeError on an image...trying next one..." + " Error: " + str(e) + return_image_name = '' + + except HTTPError as e: # If there is any HTTPError download_status = 'fail' - download_message = "UnicodeEncodeError on an image...trying next one..." + " Error: " + str(e) + download_message = "HTTPError on an image...trying next one..." + " Error: " + str(e) return_image_name = '' - except HTTPError as e: # If there is any HTTPError - download_status = 'fail' - download_message = "HTTPError on an image...trying next one..." + " Error: " + str(e) - return_image_name = '' - - except URLError as e: - download_status = 'fail' - download_message = "URLError on an image...trying next one..." + " Error: " + str(e) - return_image_name = '' - - except ssl.CertificateError as e: - download_status = 'fail' - download_message = "CertificateError on an image...trying next one..." + " Error: " + str(e) - return_image_name = '' - - except IOError as e: # If there is any IOError - download_status = 'fail' - download_message = "IOError on an image...trying next one..." + " Error: " + str(e) - return_image_name = '' - return download_status,download_message,return_image_name - - -# Finding 'Next Image' from the given raw page -def _get_next_item(s): - start_line = s.find('rg_meta notranslate') - if start_line == -1: # If no links are found then give an error! - end_quote = 0 - link = "no_links" - return link, end_quote - else: - start_line = s.find('class="rg_meta notranslate">') - start_object = s.find('{', start_line + 1) - end_object = s.find('', start_object + 1) - object_raw = str(s[start_object:end_object]) - #remove escape characters based on python version - version = (3, 0) - cur_version = sys.version_info - if cur_version >= version: #python3 - try: - object_decode = bytes(object_raw, "utf-8").decode("unicode_escape") - final_object = json.loads(object_decode) - except: - final_object = "" - else: #python2 - try: - final_object = (json.loads(repair(object_raw))) - except: - final_object = "" - return final_object, end_object - - -# Getting all links with the help of '_images_get_next_image' -def _get_all_items(page,main_directory,dir_name,limit): - items = [] - errorCount = 0 - i = 0 - count = 1 - while count < limit+1: - object, end_content = _get_next_item(page) - if object == "no_links": - break - elif object == "": - page = page[end_content:] - else: - #format the item for readability - object = format_object(object) - if arguments['metadata']: - print("\nImage Metadata" + str(object)) + except URLError as e: + download_status = 'fail' + download_message = "URLError on an image...trying next one..." + " Error: " + str(e) + return_image_name = '' - items.append(object) # Append all the links in the list named 'Links' + except ssl.CertificateError as e: + download_status = 'fail' + download_message = "CertificateError on an image...trying next one..." + " Error: " + str(e) + return_image_name = '' - #download the images - download_status,download_message,return_image_name = download_image(object['image_link'],object['image_format'],main_directory,dir_name,count) - print(download_message) - if download_status == "success": + except IOError as e: # If there is any IOError + download_status = 'fail' + download_message = "IOError on an image...trying next one..." + " Error: " + str(e) + return_image_name = '' + return download_status,download_message,return_image_name - # download image_thumbnails - if arguments['thumbnail']: - download_status, download_message_thumbnail = download_image_thumbnail(object['image_thumbnail_url'],main_directory,dir_name,return_image_name) - print(download_message_thumbnail) - count += 1 + # Finding 'Next Image' from the given raw page + def _get_next_item(self,s): + start_line = s.find('rg_meta notranslate') + if start_line == -1: # If no links are found then give an error! + end_quote = 0 + link = "no_links" + return link, end_quote + else: + start_line = s.find('class="rg_meta notranslate">') + start_object = s.find('{', start_line + 1) + end_object = s.find('', start_object + 1) + object_raw = str(s[start_object:end_object]) + #remove escape characters based on python version + version = (3, 0) + cur_version = sys.version_info + if cur_version >= version: #python3 + try: + object_decode = bytes(object_raw, "utf-8").decode("unicode_escape") + final_object = json.loads(object_decode) + except: + final_object = "" + else: #python2 + try: + final_object = (json.loads(self.repair(object_raw))) + except: + final_object = "" + return final_object, end_object + + + # Getting all links with the help of '_images_get_next_image' + def _get_all_items(self,page,main_directory,dir_name,limit,arguments): + items = [] + errorCount = 0 + i = 0 + count = 1 + while count < limit+1: + object, end_content = self._get_next_item(page) + if object == "no_links": + break + elif object == "": + page = page[end_content:] else: - errorCount += 1 + #format the item for readability + object = self.format_object(object) + if arguments['metadata']: + print("\nImage Metadata: " + str(object)) - #delay param - if arguments['delay']: - time.sleep(int(arguments['delay'])) + items.append(object) # Append all the links in the list named 'Links' - page = page[end_content:] - i += 1 - if count < limit: - print("\n\nUnfortunately all " + str( - limit) + " could not be downloaded because some images were not downloadable. " + str( - count-1) + " is all we got for this search filter!") - return items,errorCount + #download the images + download_status,download_message,return_image_name = self.download_image(object['image_link'],object['image_format'],main_directory,dir_name,count,arguments['print_urls'],arguments['socket_timeout'],arguments['prefix'],arguments['print_size']) + print(download_message) + if download_status == "success": + # download image_thumbnails + if arguments['thumbnail']: + download_status, download_message_thumbnail = self.download_image_thumbnail(object['image_thumbnail_url'],main_directory,dir_name,return_image_name,arguments['print_urls'],arguments['socket_timeout'],arguments['print_size']) + print(download_message_thumbnail) -# Bulk Download -def bulk_download(search_keyword,suffix_keywords,prefix_keywords,limit,main_directory): - # appending a dummy value to Suffix Keywords array if it is blank - if len(suffix_keywords) == 0: - suffix_keywords.append('') + count += 1 + else: + errorCount += 1 - if len(prefix_keywords) == 0: - prefix_keywords.append('') + #delay param + if arguments['delay']: + time.sleep(int(arguments['delay'])) - for pky in prefix_keywords: - for sky in suffix_keywords: # 1.for every suffix keywords - i = 0 - while i < len(search_keyword): # 2.for every main keyword - iteration = "\n" + "Item no.: " + str(i + 1) + " -->" + " Item name = " + str(pky) + str(search_keyword[i] + str(sky)) - print(iteration) - print("Evaluating...") - search_term = pky + search_keyword[i] + sky - dir_name = search_term + ('-' + arguments['color'] if arguments['color'] else '') #sub-directory + page = page[end_content:] + i += 1 + if count < limit: + print("\n\nUnfortunately all " + str( + limit) + " could not be downloaded because some images were not downloadable. " + str( + count-1) + " is all we got for this search filter!") + return items,errorCount - create_directories(main_directory,dir_name) #create directories in OS - params = build_url_parameters() #building URL with params + # Bulk Download + def download(self,arguments): - url = build_search_url(search_term,params) #building main search url + #for input coming from other python files + if __name__ != "__main__": + for arg in args_list: + if arg not in arguments: + arguments[arg] = None - if limit < 101: - raw_html = download_page(url) # download page - else: - raw_html = download_extended_page(url) - - print("Starting Download...") - items,errorCount = _get_all_items(raw_html,main_directory,dir_name,limit) #get all image items and download images - - #dumps into a text file - if arguments['extract_metadata']: - try: - if not os.path.exists("logs"): - os.makedirs("logs") - except OSError as e: - print(e) - text_file = open("logs/"+search_keyword[i]+".txt", "w") - text_file.write(json.dumps(items, indent=4, sort_keys=True)) - text_file.close() - - #Related images - if arguments['related_images']: - print("\nGetting list of related keywords...this may take a few moments") - tabs = get_all_tabs(raw_html) - for key, value in tabs.items(): - final_search_term = (search_term + " - " + key) - print("\nNow Downloading - " + final_search_term) - if limit < 101: - new_raw_html = download_page(value) # download page - else: - new_raw_html = download_extended_page(value) - create_directories(main_directory, final_search_term) - _get_all_items(new_raw_html, main_directory, search_term + " - " + key, limit) - - i += 1 - return errorCount + ######Initialization and Validation of user arguments + if arguments['keywords']: + search_keyword = [str(item) for item in arguments['keywords'].split(',')] -#------------- Main Program -------------# -for arguments in records: - #Initialization and Validation of user arguments - if arguments['keywords']: - search_keyword = [str(item) for item in arguments['keywords'].split(',')] + if arguments['keywords_from_file']: + search_keyword = self.keywords_from_file(arguments['keywords_from_file']) - #Initialization and Validation of user arguments - if arguments['keywords_from_file']: - search_keyword = [] - file_name = arguments['keywords_from_file'] - with codecs.open(file_name, 'r', encoding='utf-8-sig') as f: - if '.csv' in file_name: - for line in f: - if line in ['\n', '\r\n']: - pass - else: - search_keyword.append(line.replace('\n', '').replace('\r', '')) - # print(line) - #print(search_keyword) - elif '.txt' in file_name: - for line in f: - if line in ['\n', '\r\n']: - pass - else: - # print line - search_keyword.append(line.replace('\n', '').replace('\r', '')) - #print(search_keyword) - else: - print("Invalid file type: Valid file types are either .txt or .csv \n" - "exiting...") - sys.exit() + # both time and time range should not be allowed in the same query + if arguments['time'] and arguments['time_range']: + raise ValueError('Either time or time range should be used in a query. Both cannot be used at the same time.') + + # Additional words added to keywords + if arguments['suffix_keywords']: + suffix_keywords = [" " + str(sk) for sk in arguments['suffix_keywords'].split(',')] + else: + suffix_keywords = [''] - # both time and time range should not be allowed in the same query - if arguments['time'] and arguments['time_range']: - parser.error('Either time or time range should be used in a query. Both cannot be used at the same time.') + # Additional words added to keywords + if arguments['prefix_keywords']: + prefix_keywords = [str(sk) + " " for sk in arguments['prefix_keywords'].split(',')] + else: + prefix_keywords = [''] - #Additional words added to keywords - if arguments['suffix_keywords']: - suffix_keywords = [" " + str(sk) for sk in arguments['suffix_keywords'].split(',')] - else: - suffix_keywords = [] + # Setting limit on number of images to be downloaded + if arguments['limit']: + limit = int(arguments['limit']) + else: + limit = 100 - # Additional words added to keywords - if arguments['prefix_keywords']: - prefix_keywords = [str(sk) + " " for sk in arguments['prefix_keywords'].split(',')] - else: - prefix_keywords = [] + if arguments['url']: + current_time = str(datetime.datetime.now()).split('.')[0] + search_keyword = [current_time.replace(":", "_")] - # Setting limit on number of images to be downloaded - if arguments['limit']: - limit = int(arguments['limit']) - else: - limit = 100 + if arguments['similar_images']: + current_time = str(datetime.datetime.now()).split('.')[0] + search_keyword = [current_time.replace(":", "_")] - if arguments['url']: - current_time = str(datetime.datetime.now()).split('.')[0] - search_keyword = [current_time.replace(":", "_")] + # If single_image or url argument not present then keywords is mandatory argument + if arguments['single_image'] is None and arguments['url'] is None and arguments['similar_images'] is None and \ + arguments['keywords'] is None and arguments['keywords_from_file'] is None: + raise ValueError('Keywords is a required argument!') - if arguments['similar_images']: - current_time = str(datetime.datetime.now()).split('.')[0] - search_keyword = [current_time.replace(":", "_")] + # If this argument is present, set the custom output directory + if arguments['output_directory']: + main_directory = arguments['output_directory'] + else: + main_directory = "downloads" - # If single_image or url argument not present then keywords is mandatory argument - if arguments['single_image'] is None and arguments['url'] is None and arguments['similar_images'] is None and arguments['keywords'] is None and arguments['keywords_from_file'] is None: - parser.error('Keywords is a required argument!') + # Proxy settings + if arguments['proxy']: + os.environ["http_proxy"] = arguments['proxy'] + os.environ["https_proxy"] = arguments['proxy'] + ######Initialization Complete - # If this argument is present, set the custom output directory - if arguments['output_directory']: - main_directory = arguments['output_directory'] - else: - main_directory = "downloads" + for pky in prefix_keywords: + for sky in suffix_keywords: # 1.for every suffix keywords + i = 0 + while i < len(search_keyword): # 2.for every main keyword + iteration = "\n" + "Item no.: " + str(i + 1) + " -->" + " Item name = " + str(pky) + str(search_keyword[i] + str(sky)) + print(iteration) + print("Evaluating...") + search_term = pky + search_keyword[i] + sky + dir_name = search_term + ('-' + arguments['color'] if arguments['color'] else '') #sub-directory - # Set the delay parameter if this argument is present - if arguments['delay']: - try: - delay_time = int(arguments['delay']) - except ValueError: - parser.error('Delay parameter should be an integer!') - else: - delay_time = 0 + self.create_directories(main_directory,dir_name,arguments['thumbnail']) #create directories in OS - if arguments['print_urls']: - print_url = 'yes' - else: - print_url = 'no' + params = self.build_url_parameters(arguments) #building URL with params - if arguments['print_size']: - print_size = 'yes' - else: - print_size = 'no' - - if arguments['proxy']: - os.environ["http_proxy"] = arguments['proxy'] - os.environ["https_proxy"] = arguments['proxy'] - #Initialization Complete - - if arguments['single_image']: #Download Single Image using a URL - single_image() - else: # or download multiple images based on keywords/keyphrase search - t0 = time.time() # start the timer - errorCount = bulk_download(search_keyword,suffix_keywords,prefix_keywords,limit,main_directory) - - print("\nEverything downloaded!") - print("Total Errors: " + str(errorCount) + "\n") - t1 = time.time() # stop the timer - total_time = t1 - t0 # Calculating the total time required to crawl, find and download all the links of 60,000 images - print("Total time taken: " + str(total_time) + " Seconds") -#--------End of the main program --------# + url = self.build_search_url(search_term,params,arguments['url'],arguments['similar_images'],arguments['specific_site']) #building main search url + + if limit < 101: + raw_html = self.download_page(url) # download page + else: + raw_html = self.download_extended_page(url,arguments['chromedriver']) + + print("Starting Download...") + items,errorCount = self._get_all_items(raw_html,main_directory,dir_name,limit,arguments) #get all image items and download images + + #dumps into a text file + if arguments['extract_metadata']: + try: + if not os.path.exists("logs"): + os.makedirs("logs") + except OSError as e: + print(e) + text_file = open("logs/"+search_keyword[i]+".txt", "w") + text_file.write(json.dumps(items, indent=4, sort_keys=True)) + text_file.close() + + #Related images + if arguments['related_images']: + print("\nGetting list of related keywords...this may take a few moments") + tabs = self.get_all_tabs(raw_html) + for key, value in tabs.items(): + final_search_term = (search_term + " - " + key) + print("\nNow Downloading - " + final_search_term) + if limit < 101: + new_raw_html = self.download_page(value) # download page + else: + new_raw_html = self.download_extended_page(value,arguments['chromedriver']) + self.create_directories(main_directory, final_search_term,arguments['thumbnail']) + self._get_all_items(new_raw_html, main_directory, search_term + " - " + key, limit,arguments) + + i += 1 + print("\nErrors: " + str(errorCount) + "\n") + return + +#------------- Main Program -------------# +def main(): + records = user_input() + for arguments in records: + + if arguments['single_image']: # Download Single Image using a URL + response = googleimagesdownload() + response.single_image(arguments['single_image']) + else: # or download multiple images based on keywords/keyphrase search + t0 = time.time() # start the timer + response = googleimagesdownload() + response.download(arguments) + + print("\nEverything downloaded!") + t1 = time.time() # stop the timer + total_time = t1 - t0 # Calculating the total time required to crawl, find and download all the links of 60,000 images + print("Total time taken: " + str(total_time) + " Seconds") + +if __name__ == "__main__": + main() # In[ ]: From 15f5f188f23842c788d97ecab32f91b7a5123b3e Mon Sep 17 00:00:00 2001 From: Vasa Date: Wed, 4 Apr 2018 21:19:17 -0700 Subject: [PATCH 33/83] doc update --- README.rst | 56 ++++++++++++++++++++++++++++++++++++++++-------------- setup.py | 4 ++-- 2 files changed, 44 insertions(+), 16 deletions(-) diff --git a/README.rst b/README.rst index c25b2397..bb1625fc 100644 --- a/README.rst +++ b/README.rst @@ -11,7 +11,8 @@ Summary ======= This is a command line python program to search keywords/key-phrases on Google Images -and optionally download images to your computer. +and optionally download images to your computer. You can also invoke this script from +another python file. This is a small and ready-to-run program. No dependencies are required to be installed if you would only want to download up to 100 images per keyword. If you would want more than 100 @@ -47,9 +48,9 @@ Manually using CLI Manually using UI Go to the `repo on github `__ ==> Click on 'Clone or Download' ==> Click on 'Download ZIP' and save it on your local disk. - -Usage -===== + +Usage - Using Command Line Interface +==================================== If installed via pip or using CLI, use the following command: @@ -60,11 +61,25 @@ If installed via pip or using CLI, use the following command: If downloaded via the UI, unzip the file downloaded, go to the 'google_images_download' directory and use one of the below commands: .. code-block:: bash - + $ python3 google_images_download.py [Arguments...] OR $ python google_images_download.py [Arguments...] + +Usage - From another python file +================================ + +If you would want to use this library from another python file, you could use it as shown below: + +.. code-block:: python + + from google_images_download import google_images_download + + response = google_images_download.googleimagesdownload() + response.download({}) + + Arguments ========= @@ -128,7 +143,7 @@ Arguments | | | | | | | `Possible values: jpg, gif, png, bmp, svg, webp, ico` | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| color | c | Denotes the color filter that you want to apply to the images. | +| color | co | Denotes the color filter that you want to apply to the images. | | | | | | | | `Possible values: red, orange, yellow, green, teal, blue, purple, pink, white, gray, black, brown` | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ @@ -170,7 +185,8 @@ Arguments | | | | | | | Time is to be specified in seconds. But you can have sub-second times by using decimal points. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| url | u | Allows you search by image URL. It downloads images from the google images link provided | +| url | u | Allows you search by image when you have the URL from the Google Images page. | +| | | It downloads images from the google images link provided | | | | | | | | If you are searching an image on the browser google images page, simply grab the browser URL and paste it in this parameter | | | | It will download all the images seen on that page. | @@ -185,7 +201,7 @@ Arguments | | | | | | | You can specify the proxy settings in 'IP:Port' format | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| similar_images | si | Reverse Image Search. | +| similar_images | si | Reverse Image Search or 'Search by Image' as it is referred to on Google. | | | | | | | | Searches and downloads images that are similar to the absolute image link/url you provide. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ @@ -274,6 +290,18 @@ download images based on arguments passed. Examples ======== +- If you are calling this library from another python file, below is the sample code + +.. code-block:: python + + from google_images_download import google_images_download #importing the library + + response = google_images_download.googleimagesdownload() #class instantiation + + arguments = {"keywords":"Polar bears,baloons,Beaches","limit":20,"print_urls":True} #creating list of arguments + response.download(arguments) #passing the arguments to the function + + - If you are passing arguments from a config file, simply pass the config_file argument with name of your JSON file .. code-block:: bash @@ -283,7 +311,7 @@ Examples - Simple example of using keywords and limit arguments .. code-block:: bash - + $ googleimagesdownload --keywords "Polar bears, baloons, Beaches" --limit 20 - Using Suffix Keywords allows you to specify words after the main @@ -292,26 +320,26 @@ Examples ``car red`` and then ``car blue`` .. code-block:: bash - + $ googleimagesdownload --k "car" -sk 'red,blue,white' -l 10 - To use the short hand command .. code-block:: bash - + $ googleimagesdownload -k "Polar bears, baloons, Beaches" -l 20 - To download images with specific image extension/format .. code-block:: bash - + $ googleimagesdownload --keywords "logo" --format svg - To use color filters for the images .. code-block:: bash - - $ googleimagesdownload -k "playground" -l 20 -c red + + $ googleimagesdownload -k "playground" -l 20 -co red - To use non-English keywords for image search diff --git a/setup.py b/setup.py index 3f5f1870..5222b3ef 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from codecs import open from os import path -__version__ = '1.4.4' +__version__ = '2.0.4' here = path.abspath(path.dirname(__file__)) @@ -44,7 +44,7 @@ author_email='hnvasa@gmail.com', entry_points={ 'console_scripts': [ - 'googleimagesdownload = google_images_download.__init__:main' + 'googleimagesdownload = google_images_download.google_images_download:main' ]}, ) From eccc6acc4c644cb5fb3c5a1add95f5d7aac27bad Mon Sep 17 00:00:00 2001 From: Siva Praveen Date: Tue, 10 Apr 2018 10:58:45 +0530 Subject: [PATCH 34/83] `downloads/` added to `.gitignore` (#66) --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index dafb6cb1..3875c936 100644 --- a/.gitignore +++ b/.gitignore @@ -43,3 +43,6 @@ docs/_build # Cookiecutter output/ + +# Downloads +downloads/ From 9e0c215f5ec869e08299fd41a18fe094ba7649a9 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Thu, 26 Apr 2018 23:39:38 -0600 Subject: [PATCH 35/83] Fixed bug where files are left open (#74) * Fixed bug where files are not closed * Reordered code so urlopen, response.read, and response.close are next to each other * Close response sooner, easier for future error handling --- .../google_images_download.py | 67 ++++++++++++++----- 1 file changed, 51 insertions(+), 16 deletions(-) diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index a4477dbd..d98e6864 100644 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -259,19 +259,30 @@ def single_image(self,image_url): pass req = Request(url, headers={ "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"}) + response = urlopen(req, None, 10) + data = response.read() + response.close() + image_name = str(url[(url.rfind('/')) + 1:]) if '?' in image_name: image_name = image_name[:image_name.find('?')] if ".jpg" in image_name or ".gif" in image_name or ".png" in image_name or ".bmp" in image_name or ".svg" in image_name or ".webp" in image_name or ".ico" in image_name: - output_file = open(main_directory + "/" + image_name, 'wb') + file_name = main_directory + "/" + image_name else: - output_file = open(main_directory + "/" + image_name + ".jpg", 'wb') + file_name = main_directory + "/" + image_name + ".jpg" image_name = image_name + ".jpg" - data = response.read() - output_file.write(data) - response.close() + try: + output_file = open(file_name, 'wb') + output_file.write(data) + except OSError as e: + raise e + except IOError as e: + raise e + finally: + output_file.close() + print("completed ====> " + image_name) return @@ -465,14 +476,25 @@ def download_image_thumbnail(self,image_url,main_directory,dir_name,return_image timeout = float(socket_timeout) else: timeout = 10 - response = urlopen(req, None, timeout) - path = main_directory + "/" + dir_name + " - thumbnail" + "/" + return_image_name - output_file = open(path, 'wb') + response = urlopen(req, None, timeout) data = response.read() - output_file.write(data) response.close() + path = main_directory + "/" + dir_name + " - thumbnail" + "/" + return_image_name + + try: + output_file = open(path, 'wb') + output_file.write(data) + except OSError as e: + download_status = 'fail' + download_message = "OSError on an image...trying next one..." + " Error: " + str(e) + except IOError as e: + download_status = 'fail' + download_message = "IOError on an image...trying next one..." + " Error: " + str(e) + finally: + output_file.close() + download_status = 'success' download_message = "Completed Image Thumbnail ====> " + return_image_name @@ -515,7 +537,10 @@ def download_image(self,image_url,image_format,main_directory,dir_name,count,pri timeout = float(socket_timeout) else: timeout = 10 + response = urlopen(req, None, timeout) + data = response.read() + response.close() # keep everything after the last '/' image_name = str(image_url[(image_url.rfind('/')) + 1:]) @@ -536,16 +561,25 @@ def download_image(self,image_url,image_format,main_directory,dir_name,count,pri prefix = '' path = main_directory + "/" + dir_name + "/" + prefix + str(count) + ". " + image_name - output_file = open(path, 'wb') - data = response.read() - output_file.write(data) - response.close() - #return image name back to calling method to use it for thumbnail downloads - return_image_name = prefix + str(count) + ". " + image_name + try: + output_file = open(path, 'wb') + output_file.write(data) + except OSError as e: + download_status = 'fail' + download_message = "OSError on an image...trying next one..." + " Error: " + str(e) + return_image_name = '' + except IOError as e: + download_status = 'fail' + download_message = "IOError on an image...trying next one..." + " Error: " + str(e) + return_image_name = '' + finally: + output_file.close() + #return image name back to calling method to use it for thumbnail downloads download_status = 'success' - download_message = "Completed Image ====> " + prefix + str(count) + ". " + image_name + download_message = "Completed Image ====> " + prefix + str(count) + ". " + image_name + return_image_name = prefix + str(count) + ". " + image_name # image size parameter if print_size: @@ -575,6 +609,7 @@ def download_image(self,image_url,image_format,main_directory,dir_name,count,pri download_status = 'fail' download_message = "IOError on an image...trying next one..." + " Error: " + str(e) return_image_name = '' + return download_status,download_message,return_image_name From 00ee06e431d643db47822a4bfd0422ffe874114f Mon Sep 17 00:00:00 2001 From: Vasa Date: Fri, 27 Apr 2018 22:25:04 -0700 Subject: [PATCH 36/83] UnboundLocalError bug when closing file descriptors as per #87 improved disclaimer of edge case of getting less than 100 images as per #82 corrected type in readme file as per #78 --- README.rst | 4 +- .../google_images_download.py | 64 ++++++++++++++----- setup.py | 2 +- 3 files changed, 52 insertions(+), 18 deletions(-) diff --git a/README.rst b/README.rst index bb1625fc..4238d184 100644 --- a/README.rst +++ b/README.rst @@ -20,7 +20,7 @@ images per keyword, then you would need to install ``Selenium`` library along wi Detailed instructions in the troubleshooting section. -Compatability +Compatibility ============= This program is compatible with both the versions of python - 2.x and 3.x (recommended). @@ -129,6 +129,8 @@ Arguments | | | You can specify any integer value here. It will try and get all the images that it finds in the google image search page. | | | | | | | | If this value is not specified, it defaults to 100. | +| | | | +| | | **Note**: In case of occasional errors while downloading images, you could get less than 100 (if the limit is set to 100) | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | related_images | ri | This argument downloads a ton of images related to the keyword you provided. | | | | | diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index a4477dbd..2724fa04 100644 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -259,19 +259,29 @@ def single_image(self,image_url): pass req = Request(url, headers={ "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"}) + response = urlopen(req, None, 10) + data = response.read() + response.close() + image_name = str(url[(url.rfind('/')) + 1:]) if '?' in image_name: image_name = image_name[:image_name.find('?')] if ".jpg" in image_name or ".gif" in image_name or ".png" in image_name or ".bmp" in image_name or ".svg" in image_name or ".webp" in image_name or ".ico" in image_name: - output_file = open(main_directory + "/" + image_name, 'wb') + file_name = main_directory + "/" + image_name else: - output_file = open(main_directory + "/" + image_name + ".jpg", 'wb') + file_name = main_directory + "/" + image_name + ".jpg" image_name = image_name + ".jpg" - data = response.read() - output_file.write(data) - response.close() + try: + output_file = open(file_name, 'wb') + output_file.write(data) + output_file.close() + except OSError as e: + raise e + except IOError as e: + raise e + print("completed ====> " + image_name) return @@ -465,14 +475,24 @@ def download_image_thumbnail(self,image_url,main_directory,dir_name,return_image timeout = float(socket_timeout) else: timeout = 10 - response = urlopen(req, None, timeout) - path = main_directory + "/" + dir_name + " - thumbnail" + "/" + return_image_name - output_file = open(path, 'wb') + response = urlopen(req, None, timeout) data = response.read() - output_file.write(data) response.close() + path = main_directory + "/" + dir_name + " - thumbnail" + "/" + return_image_name + + try: + output_file = open(path, 'wb') + output_file.write(data) + output_file.close() + except OSError as e: + download_status = 'fail' + download_message = "OSError on an image...trying next one..." + " Error: " + str(e) + except IOError as e: + download_status = 'fail' + download_message = "IOError on an image...trying next one..." + " Error: " + str(e) + download_status = 'success' download_message = "Completed Image Thumbnail ====> " + return_image_name @@ -515,7 +535,10 @@ def download_image(self,image_url,image_format,main_directory,dir_name,count,pri timeout = float(socket_timeout) else: timeout = 10 + response = urlopen(req, None, timeout) + data = response.read() + response.close() # keep everything after the last '/' image_name = str(image_url[(image_url.rfind('/')) + 1:]) @@ -536,16 +559,24 @@ def download_image(self,image_url,image_format,main_directory,dir_name,count,pri prefix = '' path = main_directory + "/" + dir_name + "/" + prefix + str(count) + ". " + image_name - output_file = open(path, 'wb') - data = response.read() - output_file.write(data) - response.close() - #return image name back to calling method to use it for thumbnail downloads - return_image_name = prefix + str(count) + ". " + image_name + try: + output_file = open(path, 'wb') + output_file.write(data) + output_file.close() + except OSError as e: + download_status = 'fail' + download_message = "OSError on an image...trying next one..." + " Error: " + str(e) + return_image_name = '' + except IOError as e: + download_status = 'fail' + download_message = "IOError on an image...trying next one..." + " Error: " + str(e) + return_image_name = '' + #return image name back to calling method to use it for thumbnail downloads download_status = 'success' - download_message = "Completed Image ====> " + prefix + str(count) + ". " + image_name + download_message = "Completed Image ====> " + prefix + str(count) + ". " + image_name + return_image_name = prefix + str(count) + ". " + image_name # image size parameter if print_size: @@ -575,6 +606,7 @@ def download_image(self,image_url,image_format,main_directory,dir_name,count,pri download_status = 'fail' download_message = "IOError on an image...trying next one..." + " Error: " + str(e) return_image_name = '' + return download_status,download_message,return_image_name diff --git a/setup.py b/setup.py index 5222b3ef..6071a448 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from codecs import open from os import path -__version__ = '2.0.4' +__version__ = '2.1.0' here = path.abspath(path.dirname(__file__)) From 5c4c68cc16a280bac6b290558d452cf85c4cfab9 Mon Sep 17 00:00:00 2001 From: Vasa Date: Fri, 27 Apr 2018 22:42:49 -0700 Subject: [PATCH 37/83] added the shebang line --- google_images_download/google_images_download.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index 09275922..27ea682e 100644 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python # In[ ]: # coding: utf-8 @@ -276,12 +277,11 @@ def single_image(self,image_url): try: output_file = open(file_name, 'wb') output_file.write(data) + output_file.close() except OSError as e: raise e except IOError as e: raise e - finally: - output_file.close() print("completed ====> " + image_name) return @@ -486,14 +486,13 @@ def download_image_thumbnail(self,image_url,main_directory,dir_name,return_image try: output_file = open(path, 'wb') output_file.write(data) + output_file.close() except OSError as e: download_status = 'fail' download_message = "OSError on an image...trying next one..." + " Error: " + str(e) except IOError as e: download_status = 'fail' download_message = "IOError on an image...trying next one..." + " Error: " + str(e) - finally: - output_file.close() download_status = 'success' download_message = "Completed Image Thumbnail ====> " + return_image_name @@ -565,6 +564,7 @@ def download_image(self,image_url,image_format,main_directory,dir_name,count,pri try: output_file = open(path, 'wb') output_file.write(data) + output_file.close() except OSError as e: download_status = 'fail' download_message = "OSError on an image...trying next one..." + " Error: " + str(e) @@ -573,8 +573,6 @@ def download_image(self,image_url,image_format,main_directory,dir_name,count,pri download_status = 'fail' download_message = "IOError on an image...trying next one..." + " Error: " + str(e) return_image_name = '' - finally: - output_file.close() #return image name back to calling method to use it for thumbnail downloads download_status = 'success' @@ -826,4 +824,4 @@ def main(): if __name__ == "__main__": main() -# In[ ]: \ No newline at end of file +# In[ ]: From 6b82fc4155d26d7fbea3989b016cab975d385f06 Mon Sep 17 00:00:00 2001 From: Vasa Date: Fri, 27 Apr 2018 23:30:36 -0700 Subject: [PATCH 38/83] feature to specify the exact size of the image as per #69 --- README.rst | 7 +++++++ google_images_download/google_images_download.py | 15 +++++++++++++-- setup.py | 2 +- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 4238d184..40370ee9 100644 --- a/README.rst +++ b/README.rst @@ -167,6 +167,13 @@ Arguments | | | `Possible values: large, medium, icon, >400*300, >640*480, >800*600, >1024*768, >2MP, >4MP, >6MP, >8MP, >10MP, | | | | >12MP, >15MP, >20MP, >40MP, >70MP` | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| exact_size | es | You can specify the exact size/resolution of the images | +| | | | +| | | This value of this argument can be specified as ```` where the fist integer stands for width of the image | +| | | and the second integer stands for the height of the image. For example, ``-es 1024,786`` | +| | | | +| | | **Note**: You cannot specify both 'size' and 'exact_size' arguments in the same query. You can only give one of them. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | aspect_ratio | a | Denotes the aspect ratio of images to download. | | | | | | | | `Possible values: tall, square, wide, panoramic` | diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index 27ea682e..bd0dba19 100644 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -30,7 +30,7 @@ args_list = ["keywords", "keywords_from_file", "prefix_keywords", "suffix_keywords", "limit", "related_images", "format", "color", "color_type", "usage_rights", "size", - "aspect_ratio", "type", "time", "time_range", "delay", "url", "single_image", + "exact_size", "aspect_ratio", "type", "time", "time_range", "delay", "url", "single_image", "output_directory", "proxy", "similar_images", "specific_site", "print_urls", "print_size", "metadata", "extract_metadata", "socket_timeout", "thumbnail", "language", "prefix", "chromedriver"] @@ -74,6 +74,7 @@ def user_input(): choices=['labeled-for-reuse-with-modifications','labeled-for-reuse','labeled-for-noncommercial-reuse-with-modification','labeled-for-nocommercial-reuse']) parser.add_argument('-s', '--size', help='image size', type=str, required=False, choices=['large','medium','icon','>400*300','>640*480','>800*600','>1024*768','>2MP','>4MP','>6MP','>8MP','>10MP','>12MP','>15MP','>20MP','>40MP','>70MP']) + parser.add_argument('-es', '--exact_size', help='exact image resolution "WIDTH,HEIGHT"', type=str, required=False) parser.add_argument('-t', '--type', help='image type', type=str, required=False, choices=['face','photo','clip-art','line-drawing','animated']) parser.add_argument('-w', '--time', help='image age', type=str, required=False, @@ -353,6 +354,12 @@ def build_url_parameters(self,arguments): else: time_range = '' + if arguments['exact_size']: + size_array = [x.strip() for x in arguments['exact_size'].split(',')] + exact_size = ",isz:ex,iszw:" + str(size_array[0]) + ",iszh:" + str(size_array[1]) + else: + exact_size = '' + built_url = "&tbs=" counter = 0 params = {'color':[arguments['color'],{'red':'ic:specific,isc:red', 'orange':'ic:specific,isc:orange', 'yellow':'ic:specific,isc:yellow', 'green':'ic:specific,isc:green', 'teal':'ic:specific,isc:teel', 'blue':'ic:specific,isc:blue', 'purple':'ic:specific,isc:purple', 'pink':'ic:specific,isc:pink', 'white':'ic:specific,isc:white', 'gray':'ic:specific,isc:gray', 'black':'ic:specific,isc:black', 'brown':'ic:specific,isc:brown'}], @@ -374,7 +381,7 @@ def build_url_parameters(self,arguments): else: built_url = built_url + ',' + ext_param counter += 1 - built_url = lang_url+built_url+time_range + built_url = lang_url+built_url+exact_size+time_range return built_url @@ -707,6 +714,10 @@ def download(self,arguments): if arguments['time'] and arguments['time_range']: raise ValueError('Either time or time range should be used in a query. Both cannot be used at the same time.') + # both time and time range should not be allowed in the same query + if arguments['size'] and arguments['exact_size']: + raise ValueError('Either "size" or "exact_size" should be used in a query. Both cannot be used at the same time.') + # Additional words added to keywords if arguments['suffix_keywords']: suffix_keywords = [" " + str(sk) for sk in arguments['suffix_keywords'].split(',')] diff --git a/setup.py b/setup.py index 6071a448..7552dedf 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from codecs import open from os import path -__version__ = '2.1.0' +__version__ = '2.1.1' here = path.abspath(path.dirname(__file__)) From a182e34891ae0985a8680752b9a6464c6b6e707f Mon Sep 17 00:00:00 2001 From: Vasa Date: Sat, 28 Apr 2018 00:24:17 -0700 Subject: [PATCH 39/83] script now throws a helpful message when keyword argument is not provided --- google_images_download/google_images_download.py | 9 ++++++++- setup.py | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index bd0dba19..1ea997bd 100644 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -747,7 +747,14 @@ def download(self,arguments): # If single_image or url argument not present then keywords is mandatory argument if arguments['single_image'] is None and arguments['url'] is None and arguments['similar_images'] is None and \ arguments['keywords'] is None and arguments['keywords_from_file'] is None: - raise ValueError('Keywords is a required argument!') + print('-------------------------------\n' + 'Uh oh! Keywords is a required argument \n\n' + 'Please refer to the documentation on guide to writing queries \n' + 'https://github.com/hardikvasa/google-images-download#examples' + '\n\nexiting!\n' + '-------------------------------') + sys.exit() + # If this argument is present, set the custom output directory if arguments['output_directory']: diff --git a/setup.py b/setup.py index 7552dedf..d5179934 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from codecs import open from os import path -__version__ = '2.1.1' +__version__ = '2.1.2' here = path.abspath(path.dirname(__file__)) From c9dc97049fa245f23efeb9f75ca86c5743a27346 Mon Sep 17 00:00:00 2001 From: Julian Harris Date: Sat, 12 May 2018 10:39:56 +0100 Subject: [PATCH 40/83] Print out exception message when attempting to find the Chrome driver (#98) --- google_images_download/google_images_download.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) mode change 100644 => 100755 google_images_download/google_images_download.py diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py old mode 100644 new mode 100755 index 1ea997bd..b32efd95 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -151,10 +151,10 @@ def download_extended_page(self,url,chromedriver): try: browser = webdriver.Chrome(chromedriver, chrome_options=options) - except: + except Exception as e: print("Looks like we cannot locate the path the 'chromedriver' (use the '--chromedriver' " "argument to specify the path to the executable.) or google chrome browser is not " - "installed on your machine") + "installed on your machine (exception: %s)" % e) sys.exit() browser.set_window_size(1024, 768) From 66b5fff0bbe4b777f1936f270106cc37283cf56c Mon Sep 17 00:00:00 2001 From: Vasa Date: Sat, 12 May 2018 15:38:15 -0700 Subject: [PATCH 41/83] the script now returns list of absolute paths of the images downloaded added more clarification on providing path of chromedriver on windows OS, in the documentation --- README.rst | 16 ++++-- .../google_images_download.py | 49 +++++++++++++------ setup.py | 2 +- 3 files changed, 49 insertions(+), 18 deletions(-) diff --git a/README.rst b/README.rst index 40370ee9..cb7be968 100644 --- a/README.rst +++ b/README.rst @@ -77,7 +77,7 @@ If you would want to use this library from another python file, you could use it from google_images_download import google_images_download response = google_images_download.googleimagesdownload() - response.download({}) + absolute_image_paths = response.download({}) Arguments @@ -226,6 +226,12 @@ Arguments | | | | | | | This argument does not take any value. Just add '--print_size' or '-ps' in your query. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| print_paths | pp | Prints the list of all the absolute paths of the downloaded images | +| | | | +| | | When calling the script from another python file, this list will be saved in a variable (as shown in the example below) | +| | | | +| | | This argument also allows you to print the list on the console | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | metadata | m | Prints the metada of the image on the console. | | | | | | | | This includes image size, origin, image attributes, description, image URL, etc. | @@ -308,8 +314,8 @@ Examples response = google_images_download.googleimagesdownload() #class instantiation arguments = {"keywords":"Polar bears,baloons,Beaches","limit":20,"print_urls":True} #creating list of arguments - response.download(arguments) #passing the arguments to the function - + paths = response.download(arguments) #passing the arguments to the function + print(paths) #printing absolute paths of the downloaded images - If you are passing arguments from a config file, simply pass the config_file argument with name of your JSON file @@ -475,6 +481,10 @@ If you have pip installed the library or run the setup.py file, Selenium would h On **Windows** or **MAC** if for some reason the chromedriver gives you trouble, download it under the current directory and run the command. +On windows however, the path to chromedriver has to be given in the following format: + +'C:\\complete\\path\\to\\chromedriver.exe' + On **Linux** if you are having issues installing google chrome browser, refer to this `CentOS or Amazon Linux Guide `__ or `Ubuntu Guide `__ diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index b32efd95..3b5220a2 100755 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -13,12 +13,15 @@ from urllib.request import Request, urlopen from urllib.request import URLError, HTTPError from urllib.parse import quote - import html + import http.client + http.client._MAXHEADERS = 1000 else: # If the Current Version of Python is 2.x import urllib2 from urllib2 import Request, urlopen from urllib2 import URLError, HTTPError from urllib import quote + import httplib + httplib._MAXHEADERS = 1000 import time # Importing the time library to check the time of code execution import os import argparse @@ -27,12 +30,14 @@ import json import re import codecs +import socket args_list = ["keywords", "keywords_from_file", "prefix_keywords", "suffix_keywords", "limit", "related_images", "format", "color", "color_type", "usage_rights", "size", "exact_size", "aspect_ratio", "type", "time", "time_range", "delay", "url", "single_image", "output_directory", "proxy", "similar_images", "specific_site", "print_urls", "print_size", - "metadata", "extract_metadata", "socket_timeout", "thumbnail", "language", "prefix", "chromedriver"] + "print_paths", "metadata", "extract_metadata", "socket_timeout", "thumbnail", "language", + "prefix", "chromedriver"] def user_input(): @@ -86,6 +91,7 @@ def user_input(): parser.add_argument('-ss', '--specific_site', help='downloads images that are indexed from a specific website', type=str, required=False) parser.add_argument('-p', '--print_urls', default=False, help="Print the URLs of the images", action="store_true") parser.add_argument('-ps', '--print_size', default=False, help="Print the size of the images on disk", action="store_true") + parser.add_argument('-pp', '--print_paths', default=False, help="Prints the list of absolute paths of the images",action="store_true") parser.add_argument('-m', '--metadata', default=False, help="Print the metadata of the image", action="store_true") parser.add_argument('-e', '--extract_metadata', default=False, help="Dumps all the logs into a text file", action="store_true") parser.add_argument('-st', '--socket_timeout', default=False, help="Connection timeout waiting for the image to download", type=float) @@ -279,10 +285,10 @@ def single_image(self,image_url): output_file = open(file_name, 'wb') output_file.write(data) output_file.close() - except OSError as e: - raise e except IOError as e: raise e + except OSError as e: + raise e print("completed ====> " + image_name) return @@ -572,14 +578,12 @@ def download_image(self,image_url,image_format,main_directory,dir_name,count,pri output_file = open(path, 'wb') output_file.write(data) output_file.close() + absolute_path = os.path.abspath(path) except OSError as e: download_status = 'fail' download_message = "OSError on an image...trying next one..." + " Error: " + str(e) return_image_name = '' - except IOError as e: - download_status = 'fail' - download_message = "IOError on an image...trying next one..." + " Error: " + str(e) - return_image_name = '' + absolute_path = '' #return image name back to calling method to use it for thumbnail downloads download_status = 'success' @@ -594,28 +598,39 @@ def download_image(self,image_url,image_format,main_directory,dir_name,count,pri download_status = 'fail' download_message = "UnicodeEncodeError on an image...trying next one..." + " Error: " + str(e) return_image_name = '' + absolute_path = '' + + except URLError as e: + download_status = 'fail' + download_message = "URLError on an image...trying next one..." + " Error: " + str(e) + return_image_name = '' + absolute_path = '' except HTTPError as e: # If there is any HTTPError download_status = 'fail' download_message = "HTTPError on an image...trying next one..." + " Error: " + str(e) return_image_name = '' + absolute_path = '' except URLError as e: download_status = 'fail' download_message = "URLError on an image...trying next one..." + " Error: " + str(e) return_image_name = '' + absolute_path = '' except ssl.CertificateError as e: download_status = 'fail' download_message = "CertificateError on an image...trying next one..." + " Error: " + str(e) return_image_name = '' + absolute_path = '' except IOError as e: # If there is any IOError download_status = 'fail' download_message = "IOError on an image...trying next one..." + " Error: " + str(e) return_image_name = '' + absolute_path = '' - return download_status,download_message,return_image_name + return download_status,download_message,return_image_name,absolute_path # Finding 'Next Image' from the given raw page @@ -650,6 +665,7 @@ def _get_next_item(self,s): # Getting all links with the help of '_images_get_next_image' def _get_all_items(self,page,main_directory,dir_name,limit,arguments): items = [] + abs_path = [] errorCount = 0 i = 0 count = 1 @@ -668,7 +684,7 @@ def _get_all_items(self,page,main_directory,dir_name,limit,arguments): items.append(object) # Append all the links in the list named 'Links' #download the images - download_status,download_message,return_image_name = self.download_image(object['image_link'],object['image_format'],main_directory,dir_name,count,arguments['print_urls'],arguments['socket_timeout'],arguments['prefix'],arguments['print_size']) + download_status,download_message,return_image_name,absolute_path = self.download_image(object['image_link'],object['image_format'],main_directory,dir_name,count,arguments['print_urls'],arguments['socket_timeout'],arguments['prefix'],arguments['print_size']) print(download_message) if download_status == "success": @@ -678,6 +694,7 @@ def _get_all_items(self,page,main_directory,dir_name,limit,arguments): print(download_message_thumbnail) count += 1 + abs_path.append(absolute_path) else: errorCount += 1 @@ -691,7 +708,7 @@ def _get_all_items(self,page,main_directory,dir_name,limit,arguments): print("\n\nUnfortunately all " + str( limit) + " could not be downloaded because some images were not downloadable. " + str( count-1) + " is all we got for this search filter!") - return items,errorCount + return items,errorCount,abs_path # Bulk Download @@ -768,6 +785,7 @@ def download(self,arguments): os.environ["https_proxy"] = arguments['proxy'] ######Initialization Complete + paths = {} for pky in prefix_keywords: for sky in suffix_keywords: # 1.for every suffix keywords i = 0 @@ -790,7 +808,8 @@ def download(self,arguments): raw_html = self.download_extended_page(url,arguments['chromedriver']) print("Starting Download...") - items,errorCount = self._get_all_items(raw_html,main_directory,dir_name,limit,arguments) #get all image items and download images + items,errorCount,abs_path = self._get_all_items(raw_html,main_directory,dir_name,limit,arguments) #get all image items and download images + paths[pky + search_keyword[i] + sky] = abs_path #dumps into a text file if arguments['extract_metadata']: @@ -819,7 +838,9 @@ def download(self,arguments): i += 1 print("\nErrors: " + str(errorCount) + "\n") - return + if arguments['print_paths']: + print(paths) + return paths #------------- Main Program -------------# def main(): @@ -832,7 +853,7 @@ def main(): else: # or download multiple images based on keywords/keyphrase search t0 = time.time() # start the timer response = googleimagesdownload() - response.download(arguments) + paths = response.download(arguments) #wrapping response in a variable just for consistency print("\nEverything downloaded!") t1 = time.time() # stop the timer diff --git a/setup.py b/setup.py index d5179934..cd681434 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from codecs import open from os import path -__version__ = '2.1.2' +__version__ = '2.2.0' here = path.abspath(path.dirname(__file__)) From f88c763179f79187e816636710da28cfd77fd004 Mon Sep 17 00:00:00 2001 From: Vasa Date: Sat, 12 May 2018 15:41:32 -0700 Subject: [PATCH 42/83] the script now returns list of absolute paths of the images downloaded added more clarification on providing path of chromedriver on windows OS, in the documentation --- README.rst | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index cb7be968..8e0d6867 100644 --- a/README.rst +++ b/README.rst @@ -483,7 +483,7 @@ On **Windows** or **MAC** if for some reason the chromedriver gives you trouble, On windows however, the path to chromedriver has to be given in the following format: -'C:\\complete\\path\\to\\chromedriver.exe' +`C:\\complete\\path\\to\\chromedriver.exe` On **Linux** if you are having issues installing google chrome browser, refer to this `CentOS or Amazon Linux Guide `__ or `Ubuntu Guide `__ diff --git a/setup.py b/setup.py index cd681434..5264bab2 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from codecs import open from os import path -__version__ = '2.2.0' +__version__ = '2.2.1' here = path.abspath(path.dirname(__file__)) From 35f8bf7446d16809632e24520a41bb001c3bb04a Mon Sep 17 00:00:00 2001 From: Vasa Date: Sat, 12 May 2018 15:42:50 -0700 Subject: [PATCH 43/83] the script now returns list of absolute paths of the images downloaded added more clarification on providing path of chromedriver on windows OS, in the documentation --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 8e0d6867..39b4cbc7 100644 --- a/README.rst +++ b/README.rst @@ -483,7 +483,7 @@ On **Windows** or **MAC** if for some reason the chromedriver gives you trouble, On windows however, the path to chromedriver has to be given in the following format: -`C:\\complete\\path\\to\\chromedriver.exe` +``C:\\complete\\path\\to\\chromedriver.exe`` On **Linux** if you are having issues installing google chrome browser, refer to this `CentOS or Amazon Linux Guide `__ or `Ubuntu Guide `__ From abb155e835b085cce3b88689c8ac4b60bccfc64e Mon Sep 17 00:00:00 2001 From: Vasa Date: Mon, 14 May 2018 11:44:27 -0700 Subject: [PATCH 44/83] feature to specify the image directory feature to not spcify any image directory --- README.rst | 12 ++++++++++ .../google_images_download.py | 22 ++++++++++++++----- setup.py | 2 +- 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/README.rst b/README.rst index 39b4cbc7..239c8566 100644 --- a/README.rst +++ b/README.rst @@ -205,6 +205,18 @@ Arguments | output_directory | o | Allows you specify the main directory name in which the images are downloaded. | | | | | | | | If not specified, it will default to 'downloads' directory. This directory is located in the path from where you run this code| +| | | | +| | | The directory structure would look like: | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| image_directory | i | This lets you specify a directory inside of the main directory (output_directory) in which the images will be saved | +| | | | +| | | If not specified, it will default to the name of the keyword. | +| | | | +| | | The directory structure would look like: | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| no_directory | n | This option allows you download images directly in the main directory (output_directory) without an image_directory | +| | | | +| | | The directory structure would look like: | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | proxy | px | Allows you to specify proxy server setting for all your requests | | | | | diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index 3b5220a2..500fd943 100755 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -35,9 +35,9 @@ args_list = ["keywords", "keywords_from_file", "prefix_keywords", "suffix_keywords", "limit", "related_images", "format", "color", "color_type", "usage_rights", "size", "exact_size", "aspect_ratio", "type", "time", "time_range", "delay", "url", "single_image", - "output_directory", "proxy", "similar_images", "specific_site", "print_urls", "print_size", - "print_paths", "metadata", "extract_metadata", "socket_timeout", "thumbnail", "language", - "prefix", "chromedriver"] + "output_directory", "image_directory", "no_directory", "proxy", "similar_images", "specific_site", + "print_urls", "print_size", "print_paths", "metadata", "extract_metadata", "socket_timeout", + "thumbnail", "language", "prefix", "chromedriver"] def user_input(): @@ -69,7 +69,9 @@ def user_input(): choices=['jpg', 'gif', 'png', 'bmp', 'svg', 'webp', 'ico']) parser.add_argument('-u', '--url', help='search with google image URL', type=str, required=False) parser.add_argument('-x', '--single_image', help='downloading a single image from URL', type=str, required=False) - parser.add_argument('-o', '--output_directory', help='download images in a specific directory', type=str, required=False) + parser.add_argument('-o', '--output_directory', help='download images in a specific main directory', type=str, required=False) + parser.add_argument('-i', '--image_directory', help='download images in a specific sub-directory', type=str, required=False) + parser.add_argument('-n', '--no_directory', default=False, help='download images in the main directory but no sub-directory', action="store_true") parser.add_argument('-d', '--delay', help='delay in seconds to wait between downloading two images', type=int, required=False) parser.add_argument('-co', '--color', help='filter on color', type=str, required=False, choices=['red', 'orange', 'yellow', 'green', 'teal', 'blue', 'purple', 'pink', 'white', 'gray', 'black', 'brown']) @@ -735,6 +737,10 @@ def download(self,arguments): if arguments['size'] and arguments['exact_size']: raise ValueError('Either "size" or "exact_size" should be used in a query. Both cannot be used at the same time.') + # both image directory and no image directory should not be allowed in the same query + if arguments['image_directory'] and arguments['no_directory']: + raise ValueError('You can either specify image directory or specify no image directory, not both!') + # Additional words added to keywords if arguments['suffix_keywords']: suffix_keywords = [" " + str(sk) for sk in arguments['suffix_keywords'].split(',')] @@ -794,7 +800,13 @@ def download(self,arguments): print(iteration) print("Evaluating...") search_term = pky + search_keyword[i] + sky - dir_name = search_term + ('-' + arguments['color'] if arguments['color'] else '') #sub-directory + + if arguments['image_directory']: + dir_name = arguments['image_directory'] + elif arguments['no_directory']: + dir_name = '' + else: + dir_name = search_term + ('-' + arguments['color'] if arguments['color'] else '') #sub-directory self.create_directories(main_directory,dir_name,arguments['thumbnail']) #create directories in OS diff --git a/setup.py b/setup.py index 5264bab2..f3a4eb06 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from codecs import open from os import path -__version__ = '2.2.1' +__version__ = '2.2.2' here = path.abspath(path.dirname(__file__)) From 68f8bf723506d210fd8675aa92fb007f801c43df Mon Sep 17 00:00:00 2001 From: Vasa Date: Mon, 14 May 2018 11:46:03 -0700 Subject: [PATCH 45/83] feature to specify the image directory feature to not spcify any image directory --- README.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 239c8566..3e03962e 100644 --- a/README.rst +++ b/README.rst @@ -206,17 +206,17 @@ Arguments | | | | | | | If not specified, it will default to 'downloads' directory. This directory is located in the path from where you run this code| | | | | -| | | The directory structure would look like: | +| | | The directory structure would look like: ```` | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | image_directory | i | This lets you specify a directory inside of the main directory (output_directory) in which the images will be saved | | | | | | | | If not specified, it will default to the name of the keyword. | | | | | -| | | The directory structure would look like: | +| | | The directory structure would look like: ```` | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | no_directory | n | This option allows you download images directly in the main directory (output_directory) without an image_directory | | | | | -| | | The directory structure would look like: | +| | | The directory structure would look like: ```` | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | proxy | px | Allows you to specify proxy server setting for all your requests | | | | | From 0f1b04c49ad6ad38f4271f0a47ab22803ed10a3d Mon Sep 17 00:00:00 2001 From: amitaisopher Date: Mon, 14 May 2018 23:04:13 +0300 Subject: [PATCH 46/83] Adding test validating image download to default location (#92) --- tests/test_google_images_download.py | 53 ++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 tests/test_google_images_download.py diff --git a/tests/test_google_images_download.py b/tests/test_google_images_download.py new file mode 100644 index 00000000..ec62afd0 --- /dev/null +++ b/tests/test_google_images_download.py @@ -0,0 +1,53 @@ +from google_images_download import google_images_download +import os, errno +import time + + +def silent_remove_of_file(file): + try: + os.remove(file) + except OSError as e: + if e.errno != errno.ENOENT: + raise e + return False + return True + + +def test_download_images_to_default_location(): + start_time = time.time() + argumnets = { + "keywords": "Polar bears", + "limit": 5, + "print_urls": False + } + try: + temp = argumnets['output_folder'] + except KeyError: + pass + else: + assert False, "This test checks download to default location yet an output folder was provided" + + output_folder_path = os.path.join(os.path.realpath('.'), 'downloads', '{}'.format(argumnets['keywords'])) + if os.path.exists(output_folder_path): + start_amount_of_files_in_output_folder = len([name for name in os.listdir(output_folder_path) if os.path.isfile(os.path.join(output_folder_path, name)) and os.path.getctime(os.path.join(output_folder_path, name)) < start_time]) + else: + start_amount_of_files_in_output_folder = 0 + + response = google_images_download.googleimagesdownload() + response.download(argumnets) + files_modified_after_test_started = [name for name in os.listdir(output_folder_path) if os.path.isfile(os.path.join(output_folder_path, name)) and os.path.getmtime(os.path.join(output_folder_path, name)) > start_time] + end_amount_of_files_in_output_folder = len(files_modified_after_test_started) + print(f"Files downloaded by test {__name__}:") + for file in files_modified_after_test_started: + print(os.path.join(output_folder_path, file)) + + + # assert end_amount_of_files_in_output_folder - start_amount_of_files_in_output_folder == argumnets['limit'] + assert end_amount_of_files_in_output_folder == argumnets['limit'] + + print(f"Cleaning up all files downloaded by test {__name__}...") + for file in files_modified_after_test_started: + if silent_remove_of_file(os.path.join(output_folder_path, file)): + print(f"Deleted {os.path.join(output_folder_path, file)}") + else: + print(f"Failed to delete {os.path.join(output_folder_path, file)}") \ No newline at end of file From 0ec21c75ce1c7d541f8b78ac6a0537dd0f0757eb Mon Sep 17 00:00:00 2001 From: Vasa Date: Tue, 15 May 2018 21:29:55 -0700 Subject: [PATCH 47/83] feature to turn on safe search filter as in #99 resolved the bug where time_range filter was ignored as in #93 made some improvements on the IncompleteReadException as in #83 option to exclude numbered ordering in image names as in #100 --- README.rst | 12 ++++++ .../google_images_download.py | 38 ++++++++++++++----- setup.py | 2 +- 3 files changed, 42 insertions(+), 10 deletions(-) diff --git a/README.rst b/README.rst index 3e03962e..f0a19b1d 100644 --- a/README.rst +++ b/README.rst @@ -280,6 +280,18 @@ Arguments | | | | | | | The path looks like this: "path/to/chromedriver". In windows it will be "path/to/chromedriver.exe" | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| safe_search | sa | Searches for images with the Safe Search filter On | +| | | | +| | | And this filter will be Off by default if you do not specify the safe_search argument | +| | | | +| | | This argument does not take any value. Just add '--safe_search' or '-sa' in your query. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| no_numbering | nn | When you specify this argument, the script does not add ordered numbering as prefix to the images it downloads | +| | | | +| | | If this argument is not specified, the images are numbered in order in which they are downloaded | +| | | | +| | | This argument does not take any value. Just add '--no_numbering' or '-nn' in your query. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | help | h | show the help message regarding the usage of the above arguments | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index 500fd943..96522566 100755 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -14,6 +14,7 @@ from urllib.request import URLError, HTTPError from urllib.parse import quote import http.client + from http.client import IncompleteRead http.client._MAXHEADERS = 1000 else: # If the Current Version of Python is 2.x import urllib2 @@ -21,6 +22,7 @@ from urllib2 import URLError, HTTPError from urllib import quote import httplib + from httplib import IncompleteRead httplib._MAXHEADERS = 1000 import time # Importing the time library to check the time of code execution import os @@ -33,11 +35,11 @@ import socket args_list = ["keywords", "keywords_from_file", "prefix_keywords", "suffix_keywords", - "limit", "related_images", "format", "color", "color_type", "usage_rights", "size", + "limit", "format", "color", "color_type", "usage_rights", "size", "exact_size", "aspect_ratio", "type", "time", "time_range", "delay", "url", "single_image", "output_directory", "image_directory", "no_directory", "proxy", "similar_images", "specific_site", "print_urls", "print_size", "print_paths", "metadata", "extract_metadata", "socket_timeout", - "thumbnail", "language", "prefix", "chromedriver"] + "thumbnail", "language", "prefix", "chromedriver", "related_images", "safe_search", "no_numbering"] def user_input(): @@ -104,6 +106,8 @@ def user_input(): parser.add_argument('-px', '--proxy', help='specify a proxy address and port', type=str, required=False) parser.add_argument('-cd', '--chromedriver', help='specify the path to chromedriver executable in your local machine', type=str, required=False) parser.add_argument('-ri', '--related_images', default=False, help="Downloads images that are similar to the keyword provided", action="store_true") + parser.add_argument('-sa', '--safe_search', default=False, help="Turns on the safe search filter while searching for images", action="store_true") + parser.add_argument('-nn', '--no_numbering', default=False, help="Allows you to exclude the default numbering of images", action="store_true") args = parser.parse_args() arguments = vars(args) @@ -358,7 +362,7 @@ def build_url_parameters(self,arguments): if arguments['time_range']: json_acceptable_string = arguments['time_range'].replace("'", "\"") d = json.loads(json_acceptable_string) - time_range = '&cdr:1,cd_min:' + d['time_min'] + ',cd_max:' + d['time_min'] + time_range = ',cdr:1,cd_min:' + d['time_min'] + ',cd_max:' + d['time_min'] else: time_range = '' @@ -394,7 +398,9 @@ def build_url_parameters(self,arguments): #building main search URL - def build_search_url(self,search_term,params,url,similar_images,specific_site): + def build_search_url(self,search_term,params,url,similar_images,specific_site,safe_search): + #check safe_search + safe_search_string = "&safe=active" # check the args and choose the URL if url: url = url @@ -408,7 +414,12 @@ def build_search_url(self,search_term,params,url,similar_images,specific_site): else: url = 'https://www.google.com/search?q=' + quote( search_term) + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch' + params + '&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg' - #print(url) + + #safe search check + if safe_search: + url = url + safe_search_string + + # print(url) return url @@ -539,7 +550,7 @@ def download_image_thumbnail(self,image_url,main_directory,dir_name,return_image # Download Images - def download_image(self,image_url,image_format,main_directory,dir_name,count,print_urls,socket_timeout,prefix,print_size): + def download_image(self,image_url,image_format,main_directory,dir_name,count,print_urls,socket_timeout,prefix,print_size,no_numbering): if print_urls: print("Image URL: " + image_url) try: @@ -574,7 +585,10 @@ def download_image(self,image_url,image_format,main_directory,dir_name,count,pri else: prefix = '' - path = main_directory + "/" + dir_name + "/" + prefix + str(count) + ". " + image_name + if no_numbering: + path = main_directory + "/" + dir_name + "/" + prefix + image_name + else: + path = main_directory + "/" + dir_name + "/" + prefix + str(count) + ". " + image_name try: output_file = open(path, 'wb') @@ -632,6 +646,12 @@ def download_image(self,image_url,image_format,main_directory,dir_name,count,pri return_image_name = '' absolute_path = '' + except IncompleteRead as e: + download_status = 'fail' + download_message = "IncompleteReadError on an image...trying next one..." + " Error: " + str(e) + return_image_name = '' + absolute_path = '' + return download_status,download_message,return_image_name,absolute_path @@ -686,7 +706,7 @@ def _get_all_items(self,page,main_directory,dir_name,limit,arguments): items.append(object) # Append all the links in the list named 'Links' #download the images - download_status,download_message,return_image_name,absolute_path = self.download_image(object['image_link'],object['image_format'],main_directory,dir_name,count,arguments['print_urls'],arguments['socket_timeout'],arguments['prefix'],arguments['print_size']) + download_status,download_message,return_image_name,absolute_path = self.download_image(object['image_link'],object['image_format'],main_directory,dir_name,count,arguments['print_urls'],arguments['socket_timeout'],arguments['prefix'],arguments['print_size'],arguments['no_numbering']) print(download_message) if download_status == "success": @@ -812,7 +832,7 @@ def download(self,arguments): params = self.build_url_parameters(arguments) #building URL with params - url = self.build_search_url(search_term,params,arguments['url'],arguments['similar_images'],arguments['specific_site']) #building main search url + url = self.build_search_url(search_term,params,arguments['url'],arguments['similar_images'],arguments['specific_site'],arguments['safe_search']) #building main search url if limit < 101: raw_html = self.download_page(url) # download page diff --git a/setup.py b/setup.py index f3a4eb06..c725c937 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from codecs import open from os import path -__version__ = '2.2.2' +__version__ = '2.3.0' here = path.abspath(path.dirname(__file__)) From d05d0614e6843c7467d36a41630f02e2ce9ce339 Mon Sep 17 00:00:00 2001 From: neon_cyan Date: Sat, 11 Aug 2018 21:03:29 +0100 Subject: [PATCH 48/83] Added a sample config file and made metadata export to a json (#132) * Added sample config & export to json instead of txt * Fixed typo * Added logs to .gitignore --- .gitignore | 3 +++ google_images_download/google_images_download.py | 8 ++++---- google_images_download/sample_config.json | 16 ++++++++++++++++ 3 files changed, 23 insertions(+), 4 deletions(-) create mode 100644 google_images_download/sample_config.json diff --git a/.gitignore b/.gitignore index 3875c936..bfbb05d0 100644 --- a/.gitignore +++ b/.gitignore @@ -46,3 +46,6 @@ output/ # Downloads downloads/ + +# Logs +logs/ diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index 96522566..045191b2 100755 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -843,16 +843,16 @@ def download(self,arguments): items,errorCount,abs_path = self._get_all_items(raw_html,main_directory,dir_name,limit,arguments) #get all image items and download images paths[pky + search_keyword[i] + sky] = abs_path - #dumps into a text file + #dumps into a json file if arguments['extract_metadata']: try: if not os.path.exists("logs"): os.makedirs("logs") except OSError as e: print(e) - text_file = open("logs/"+search_keyword[i]+".txt", "w") - text_file.write(json.dumps(items, indent=4, sort_keys=True)) - text_file.close() + json_file = open("logs/"+search_keyword[i]+".json", "w") + json.dump(items, json_file, indent=4, sort_keys=True) + json_file.close() #Related images if arguments['related_images']: diff --git a/google_images_download/sample_config.json b/google_images_download/sample_config.json new file mode 100644 index 00000000..b259c9bb --- /dev/null +++ b/google_images_download/sample_config.json @@ -0,0 +1,16 @@ +{ + "Records": [ + { + "keywords": "apple", + "limit": 5, + "color": "green", + "print_urls": true + }, + { + "keywords": "universe", + "limit": 15, + "size": "large", + "print_urls": true + } + ] +} From eff8aa8a968203692c3cbe18a961909991c039f4 Mon Sep 17 00:00:00 2001 From: buddydvd Date: Sat, 11 Aug 2018 14:14:24 -0700 Subject: [PATCH 49/83] For --extract_metadata, skip failed downloads and include saved filename (#104) * For --extract_metadata, exclude failed downloads * For --extract_metadata, include saved image filename --- google_images_download/google_images_download.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index 045191b2..794d5874 100755 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -703,8 +703,6 @@ def _get_all_items(self,page,main_directory,dir_name,limit,arguments): if arguments['metadata']: print("\nImage Metadata: " + str(object)) - items.append(object) # Append all the links in the list named 'Links' - #download the images download_status,download_message,return_image_name,absolute_path = self.download_image(object['image_link'],object['image_format'],main_directory,dir_name,count,arguments['print_urls'],arguments['socket_timeout'],arguments['prefix'],arguments['print_size'],arguments['no_numbering']) print(download_message) @@ -716,6 +714,8 @@ def _get_all_items(self,page,main_directory,dir_name,limit,arguments): print(download_message_thumbnail) count += 1 + object['image_filename'] = return_image_name + items.append(object) # Append all the links in the list named 'Links' abs_path.append(absolute_path) else: errorCount += 1 From a7d7d1b3e8f58a2241517269241a9749236c372d Mon Sep 17 00:00:00 2001 From: Vasa Date: Fri, 17 Aug 2018 19:11:21 -0700 Subject: [PATCH 50/83] Offset downloading from #116 thanks to @leberknecht for the feature --- README.rst | 6 ++++++ google_images_download/google_images_download.py | 7 ++++++- setup.py | 2 +- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index f0a19b1d..8d140352 100644 --- a/README.rst +++ b/README.rst @@ -292,6 +292,12 @@ Arguments | | | | | | | This argument does not take any value. Just add '--no_numbering' or '-nn' in your query. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| offset | of | When you specify this argument, it will skip the offset number of links before it starts downloading images | +| | | | +| | | If this argument is not specified, the script will start downloading form the first link until the limit is reached | +| | | | +| | | This argument takes integer. Make sure the value of this argument is less than the value of limit | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | help | h | show the help message regarding the usage of the above arguments | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index 794d5874..5031eee0 100755 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -39,7 +39,8 @@ "exact_size", "aspect_ratio", "type", "time", "time_range", "delay", "url", "single_image", "output_directory", "image_directory", "no_directory", "proxy", "similar_images", "specific_site", "print_urls", "print_size", "print_paths", "metadata", "extract_metadata", "socket_timeout", - "thumbnail", "language", "prefix", "chromedriver", "related_images", "safe_search", "no_numbering"] + "thumbnail", "language", "prefix", "chromedriver", "related_images", "safe_search", "no_numbering", + "offset"] def user_input(): @@ -108,6 +109,7 @@ def user_input(): parser.add_argument('-ri', '--related_images', default=False, help="Downloads images that are similar to the keyword provided", action="store_true") parser.add_argument('-sa', '--safe_search', default=False, help="Turns on the safe search filter while searching for images", action="store_true") parser.add_argument('-nn', '--no_numbering', default=False, help="Allows you to exclude the default numbering of images", action="store_true") + parser.add_argument('-of', '--offset', help="Where to start in the fetched links", type=str, required=False) args = parser.parse_args() arguments = vars(args) @@ -697,6 +699,9 @@ def _get_all_items(self,page,main_directory,dir_name,limit,arguments): break elif object == "": page = page[end_content:] + elif arguments['offset'] and count < int(arguments['offset']): + count += 1 + page = page[end_content:] else: #format the item for readability object = self.format_object(object) diff --git a/setup.py b/setup.py index c725c937..fe28fdf9 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from codecs import open from os import path -__version__ = '2.3.0' +__version__ = '2.4.0' here = path.abspath(path.dirname(__file__)) From c81970006cd765905cdbb2b21b7b4cfb75285dbf Mon Sep 17 00:00:00 2001 From: Vasa Date: Sat, 18 Aug 2018 16:37:45 -0700 Subject: [PATCH 51/83] doc changes required as per #110, #120, #130, #132, #73 --- README.rst | 20 +++++++++++--------- setup.py | 2 +- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/README.rst b/README.rst index 8d140352..42a2bb3b 100644 --- a/README.rst +++ b/README.rst @@ -250,7 +250,7 @@ Arguments | | | | | | | This argument does not take any value. Just add '--metadata' or '-m' in your query. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| extract_metadata | e | This option allows you to save metadata of all the downloaded images in a text file. | +| extract_metadata | e | This option allows you to save metadata of all the downloaded images in a JSON file. | | | | | | | | This file can be found in the ``logs/`` directory. The name of the file would be same as the keyword nam | | | | | @@ -278,7 +278,7 @@ Arguments +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | chromedriver | cd | With this argument you can pass the path to the 'chromedriver'. | | | | | -| | | The path looks like this: "path/to/chromedriver". In windows it will be "path/to/chromedriver.exe" | +| | | The path looks like this: "path/to/chromedriver". In windows it will be "C:\\path\\to\\chromedriver.exe" | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | safe_search | sa | Searches for images with the Safe Search filter On | | | | | @@ -454,13 +454,13 @@ Examples Troubleshooting =============== -**## SSL Errors** +**#~~~# SSL Errors** If you do see SSL errors on Mac for Python 3, please go to Finder —> Applications —> Python 3 —> Click on the ‘Install Certificates.command’ and run the file. -**## googleimagesdownload: command not found** +**#~~~# googleimagesdownload: command not found** While using the above commands, if you get ``Error: -bash: googleimagesdownload: command not found`` then you have to set the correct path variable. @@ -485,12 +485,12 @@ together they make: ``/Library/Frameworks/Python.framework/Versions/2.7/bin`` wh $ export PATH="/Library/Frameworks/Python.framework/Versions/2.7/bin" -**## [Errno 13] Permission denied creating directory 'downloads'** +**#~~~# [Errno 13] Permission denied creating directory 'downloads'** When you run the command, it downloads the images in the current directory (the directory from where you are running the command). If you get permission denied error for creating the `downloads directory`, then move to a directory in which you have the write permission and then run the command again. -**## Permission denied while installing the library** +**#~~~# Permission denied while installing the library** On MAC and Linux, when you get permission denied when installing the library using pip, try doing a user install. @@ -501,11 +501,11 @@ On MAC and Linux, when you get permission denied when installing the library usi You can also run pip install as a superuser with ``sudo pip install google_images_download`` but it is not generally a good idea because it can cause issues with your system-level packages. -**## Installing the chromedriver (with Selenium)** +**#~~~# Installing the chromedriver (with Selenium)** -If you would want to download more than 100 images per keyword, then you will need to install 'selenium' along with 'chromedriver'. +If you would want to download more than 100 images per keyword, then you will need to install 'selenium' library along with 'chromedriver' extension. -If you have pip installed the library or run the setup.py file, Selenium would have automatically installed on your machine. You will also need Chrome browser on your machine. For chromedriver: +If you have pip-installed the library or had run the setup.py file, Selenium would have automatically installed on your machine. You will also need Chrome browser on your machine. For chromedriver: `Download the correct chromedriver `__ based on your operating system. @@ -521,6 +521,8 @@ or `Ubuntu Guide Date: Sat, 18 Aug 2018 16:42:54 -0700 Subject: [PATCH 52/83] doc changes required as per #110, #120, #130, #132, #73 --- README.rst | 10 +++++----- setup.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.rst b/README.rst index 42a2bb3b..49055b96 100644 --- a/README.rst +++ b/README.rst @@ -15,8 +15,8 @@ and optionally download images to your computer. You can also invoke this script another python file. This is a small and ready-to-run program. No dependencies are required to be installed -if you would only want to download up to 100 images per keyword. If you would want more than 100 -images per keyword, then you would need to install ``Selenium`` library along with ``chromedriver``. +if you would only want to download up to 100 images per keyword. If you would want **more than 100 +images** per keyword, then you would need to install ``Selenium`` library along with ``chromedriver``. Detailed instructions in the troubleshooting section. @@ -451,8 +451,8 @@ Examples -------------- -Troubleshooting -=============== +Troubleshooting Errors/Issues +============================= **#~~~# SSL Errors** @@ -526,7 +526,7 @@ If on any rare occasion the chromedriver does not work for you, try downgrading Structure ========= -Below diagram represents the code logic. +Below diagram represents the algorithm logic to download images. .. figure:: http://www.zseries.in/flow-chart.png :alt: diff --git a/setup.py b/setup.py index 39323e3b..387fc3de 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from codecs import open from os import path -__version__ = '2.4.1' +__version__ = '2.4.2' here = path.abspath(path.dirname(__file__)) From 1c08f69e5a81a792c78878fbbfb58272cbd8ba93 Mon Sep 17 00:00:00 2001 From: hellogan <44324739+hellogan@users.noreply.github.com> Date: Sun, 4 Nov 2018 01:13:29 -0400 Subject: [PATCH 53/83] Added ability to view image URLs without downloading (#169) * Added argument to make downloading optional * Updated readme to reflect change --- README.rst | 5 ++++ .../google_images_download.py | 24 ++++++++++++------- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/README.rst b/README.rst index 49055b96..dff2690f 100644 --- a/README.rst +++ b/README.rst @@ -300,6 +300,11 @@ Arguments +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | help | h | show the help message regarding the usage of the above arguments | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| no_download | nd | Print the URLs of the images on the console without downloading them. These image URLs can be used for debugging purposes | +| | | | +| | | This argument does not take any value. Just add '--no-download' or '-nd' in your query. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ + **Note:** If ``single_image`` or ``url`` parameter is not present, then keywords is a mandatory parameter. No other parameters are mandatory. diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index 5031eee0..912c4ac7 100755 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -40,7 +40,7 @@ "output_directory", "image_directory", "no_directory", "proxy", "similar_images", "specific_site", "print_urls", "print_size", "print_paths", "metadata", "extract_metadata", "socket_timeout", "thumbnail", "language", "prefix", "chromedriver", "related_images", "safe_search", "no_numbering", - "offset"] + "offset", "no_download"] def user_input(): @@ -110,6 +110,7 @@ def user_input(): parser.add_argument('-sa', '--safe_search', default=False, help="Turns on the safe search filter while searching for images", action="store_true") parser.add_argument('-nn', '--no_numbering', default=False, help="Allows you to exclude the default numbering of images", action="store_true") parser.add_argument('-of', '--offset', help="Where to start in the fetched links", type=str, required=False) + parser.add_argument('-nd', '--no_download', default=False, help="Prints the URLs of the images and/or thumbnails without downloading them", action="store_true") args = parser.parse_args() arguments = vars(args) @@ -492,9 +493,11 @@ def create_directories(self,main_directory, dir_name,thumbnail): # Download Images - def download_image_thumbnail(self,image_url,main_directory,dir_name,return_image_name,print_urls,socket_timeout,print_size): - if print_urls: + def download_image_thumbnail(self,image_url,main_directory,dir_name,return_image_name,print_urls,socket_timeout,print_size,no_download): + if print_urls or no_download: print("Image URL: " + image_url) + if no_download: + return "success","Printed url without downloading" try: req = Request(image_url, headers={ "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"}) @@ -552,9 +555,11 @@ def download_image_thumbnail(self,image_url,main_directory,dir_name,return_image # Download Images - def download_image(self,image_url,image_format,main_directory,dir_name,count,print_urls,socket_timeout,prefix,print_size,no_numbering): - if print_urls: + def download_image(self,image_url,image_format,main_directory,dir_name,count,print_urls,socket_timeout,prefix,print_size,no_numbering,no_download): + if print_urls or no_download: print("Image URL: " + image_url) + if no_download: + return "success","Printed url without downloading",None,None try: req = Request(image_url, headers={ "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"}) @@ -709,13 +714,13 @@ def _get_all_items(self,page,main_directory,dir_name,limit,arguments): print("\nImage Metadata: " + str(object)) #download the images - download_status,download_message,return_image_name,absolute_path = self.download_image(object['image_link'],object['image_format'],main_directory,dir_name,count,arguments['print_urls'],arguments['socket_timeout'],arguments['prefix'],arguments['print_size'],arguments['no_numbering']) + download_status,download_message,return_image_name,absolute_path = self.download_image(object['image_link'],object['image_format'],main_directory,dir_name,count,arguments['print_urls'],arguments['socket_timeout'],arguments['prefix'],arguments['print_size'],arguments['no_numbering'],arguments['no_download']) print(download_message) if download_status == "success": # download image_thumbnails if arguments['thumbnail']: - download_status, download_message_thumbnail = self.download_image_thumbnail(object['image_thumbnail_url'],main_directory,dir_name,return_image_name,arguments['print_urls'],arguments['socket_timeout'],arguments['print_size']) + download_status, download_message_thumbnail = self.download_image_thumbnail(object['image_thumbnail_url'],main_directory,dir_name,return_image_name,arguments['print_urls'],arguments['socket_timeout'],arguments['print_size'],arguments['no_download']) print(download_message_thumbnail) count += 1 @@ -844,7 +849,10 @@ def download(self,arguments): else: raw_html = self.download_extended_page(url,arguments['chromedriver']) - print("Starting Download...") + if arguments['no_download']: + print("Starting to Print Image URLS") + else: + print("Starting Download...") items,errorCount,abs_path = self._get_all_items(raw_html,main_directory,dir_name,limit,arguments) #get all image items and download images paths[pky + search_keyword[i] + sky] = abs_path From 26a324009d8469efa3301a89c4bb747ca9b2ac89 Mon Sep 17 00:00:00 2001 From: Vasa Date: Sat, 3 Nov 2018 22:16:17 -0700 Subject: [PATCH 54/83] follow up commit from #169 --- README.rst | 8 ++++---- setup.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index dff2690f..5bab145b 100644 --- a/README.rst +++ b/README.rst @@ -298,11 +298,11 @@ Arguments | | | | | | | This argument takes integer. Make sure the value of this argument is less than the value of limit | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| help | h | show the help message regarding the usage of the above arguments | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| no_download | nd | Print the URLs of the images on the console without downloading them. These image URLs can be used for debugging purposes | +| no_download | nd | Print the URLs of the images on the console without downloading them. These image URLs can be used for debugging purposes | | | | | -| | | This argument does not take any value. Just add '--no-download' or '-nd' in your query. | +| | | This argument does not take any value. Just add '--no-download' or '-nd' in your query. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| help | h | show the help message regarding the usage of the above arguments | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ diff --git a/setup.py b/setup.py index 387fc3de..503f6d68 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from codecs import open from os import path -__version__ = '2.4.2' +__version__ = '2.4.3' here = path.abspath(path.dirname(__file__)) From 10a782c35e83408ed99ab0677dddcf354a87ceb7 Mon Sep 17 00:00:00 2001 From: hellogan <44324739+hellogan@users.noreply.github.com> Date: Sun, 4 Nov 2018 01:36:17 -0400 Subject: [PATCH 55/83] Changed Error Message (#168) Improving the error reporting if raw page could not be downloaded. --- google_images_download/google_images_download.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index 912c4ac7..1da63f6b 100755 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -136,7 +136,7 @@ def download_page(self,url): respData = str(resp.read()) return respData except Exception as e: - print(str(e)) + print("Could not open URL. Please check your internet connection and/or ssl settings") else: # If the Current Version of Python is 2.x try: headers = {} @@ -150,6 +150,7 @@ def download_page(self,url): page = response.read() return page except: + print("Could not open URL. Please check your internet connection and/or ssl settings") return "Page Not found" From e270624ed3111d45f5c6341581e7a7f56045c42e Mon Sep 17 00:00:00 2001 From: LFly <820364030@qq.com> Date: Sun, 4 Nov 2018 14:48:20 +0800 Subject: [PATCH 56/83] Update google_images_download.py (#156) replace or by function any and map, make code more simple and less (".jpg", ".gif", ".png", ".bmp", ".svg", ".webp", ".ico") the list well defined before use --- google_images_download/google_images_download.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index 1da63f6b..433dc3bb 100755 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -268,6 +268,7 @@ def format_object(self,object): #function to download single image def single_image(self,image_url): main_directory = "downloads" + extensions = (".jpg", ".gif", ".png", ".bmp", ".svg", ".webp", ".ico") url = image_url try: os.makedirs(main_directory) @@ -285,7 +286,8 @@ def single_image(self,image_url): image_name = str(url[(url.rfind('/')) + 1:]) if '?' in image_name: image_name = image_name[:image_name.find('?')] - if ".jpg" in image_name or ".gif" in image_name or ".png" in image_name or ".bmp" in image_name or ".svg" in image_name or ".webp" in image_name or ".ico" in image_name: + # if ".jpg" in image_name or ".gif" in image_name or ".png" in image_name or ".bmp" in image_name or ".svg" in image_name or ".webp" in image_name or ".ico" in image_name: + if any(map(lambda extension: extension in image_name, extensions)): file_name = main_directory + "/" + image_name else: file_name = main_directory + "/" + image_name + ".jpg" From aa4b5679a59fa56ee299c1d5b75407b5fc60019e Mon Sep 17 00:00:00 2001 From: Field Rain Date: Sun, 4 Nov 2018 15:00:14 +0800 Subject: [PATCH 57/83] Update get_next_tab(self,s) (#154) It seems that Google has updated its html format. 'class="ZO5Spb"' has been deprecated. --- google_images_download/google_images_download.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index 433dc3bb..8c9fe8af 100755 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -216,19 +216,19 @@ def repair(self,brokenjson): # Finding 'Next Image' from the given raw page def get_next_tab(self,s): - start_line = s.find('class="ZO5Spb"') + start_line = s.find('class="dtviD"') if start_line == -1: # If no links are found then give an error! end_quote = 0 link = "no_tabs" return link,'',end_quote else: - start_line = s.find('class="ZO5Spb"') + start_line = s.find('class="dtviD"') start_content = s.find('href="', start_line + 1) end_content = s.find('">', start_content + 1) url_item = "https://www.google.com" + str(s[start_content+6:end_content]) url_item = url_item.replace('&', '&') - start_line_2 = s.find('class="ZO5Spb"') + start_line_2 = s.find('class="dtviD"') start_content_2 = s.find(':', start_line_2 + 1) end_content_2 = s.find('"', start_content_2 + 1) url_item_name = str(s[start_content_2 + 1:end_content_2]) From 12f8b37335b1dcad71edb4f47ebdca13c4c9d586 Mon Sep 17 00:00:00 2001 From: Cedric-Garcia Date: Sun, 4 Nov 2018 02:13:49 -0500 Subject: [PATCH 58/83] Repair time_range download feature (#152) Closes #105 --- google_images_download/google_images_download.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index 8c9fe8af..0b7c3a87 100755 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -368,7 +368,7 @@ def build_url_parameters(self,arguments): if arguments['time_range']: json_acceptable_string = arguments['time_range'].replace("'", "\"") d = json.loads(json_acceptable_string) - time_range = ',cdr:1,cd_min:' + d['time_min'] + ',cd_max:' + d['time_min'] + time_range = ',cdr:1,cd_min:' + d['time_min'] + ',cd_max:' + d['time_max'] else: time_range = '' From 157a1339a7ff4fc49088c07119906eb077589c6e Mon Sep 17 00:00:00 2001 From: git commit suicide Date: Sun, 4 Nov 2018 09:33:11 +0200 Subject: [PATCH 59/83] Fix clipart parameter name (Issue #171) (#172) --- google_images_download/google_images_download.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index 0b7c3a87..9a9fcf3d 100755 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -86,7 +86,7 @@ def user_input(): choices=['large','medium','icon','>400*300','>640*480','>800*600','>1024*768','>2MP','>4MP','>6MP','>8MP','>10MP','>12MP','>15MP','>20MP','>40MP','>70MP']) parser.add_argument('-es', '--exact_size', help='exact image resolution "WIDTH,HEIGHT"', type=str, required=False) parser.add_argument('-t', '--type', help='image type', type=str, required=False, - choices=['face','photo','clip-art','line-drawing','animated']) + choices=['face','photo','clipart','line-drawing','animated']) parser.add_argument('-w', '--time', help='image age', type=str, required=False, choices=['past-24-hours','past-7-days']) parser.add_argument('-wr', '--time_range', help='time range for the age of the image. should be in the format {"time_min":"MM/DD/YYYY","time_max":"MM/DD/YYYY"}', type=str, required=False) @@ -384,7 +384,7 @@ def build_url_parameters(self,arguments): 'color_type':[arguments['color_type'],{'full-color':'ic:color', 'black-and-white':'ic:gray','transparent':'ic:trans'}], 'usage_rights':[arguments['usage_rights'],{'labeled-for-reuse-with-modifications':'sur:fmc','labeled-for-reuse':'sur:fc','labeled-for-noncommercial-reuse-with-modification':'sur:fm','labeled-for-nocommercial-reuse':'sur:f'}], 'size':[arguments['size'],{'large':'isz:l','medium':'isz:m','icon':'isz:i','>400*300':'isz:lt,islt:qsvga','>640*480':'isz:lt,islt:vga','>800*600':'isz:lt,islt:svga','>1024*768':'visz:lt,islt:xga','>2MP':'isz:lt,islt:2mp','>4MP':'isz:lt,islt:4mp','>6MP':'isz:lt,islt:6mp','>8MP':'isz:lt,islt:8mp','>10MP':'isz:lt,islt:10mp','>12MP':'isz:lt,islt:12mp','>15MP':'isz:lt,islt:15mp','>20MP':'isz:lt,islt:20mp','>40MP':'isz:lt,islt:40mp','>70MP':'isz:lt,islt:70mp'}], - 'type':[arguments['type'],{'face':'itp:face','photo':'itp:photo','clip-art':'itp:clip-art','line-drawing':'itp:lineart','animated':'itp:animated'}], + 'type':[arguments['type'],{'face':'itp:face','photo':'itp:photo','clipart':'itp:clipart','line-drawing':'itp:lineart','animated':'itp:animated'}], 'time':[arguments['time'],{'past-24-hours':'qdr:d','past-7-days':'qdr:w'}], 'aspect_ratio':[arguments['aspect_ratio'],{'tall':'iar:t','square':'iar:s','wide':'iar:w','panoramic':'iar:xw'}], 'format':[arguments['format'],{'jpg':'ift:jpg','gif':'ift:gif','png':'ift:png','bmp':'ift:bmp','svg':'ift:svg','webp':'webp','ico':'ift:ico'}]} From 043c327522e37611e96c48dcfe3321838b21e7e8 Mon Sep 17 00:00:00 2001 From: Vasa Date: Sun, 4 Nov 2018 01:55:35 -0700 Subject: [PATCH 60/83] new minor version 2.5.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 503f6d68..c47bcad5 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from codecs import open from os import path -__version__ = '2.4.3' +__version__ = '2.5.0' here = path.abspath(path.dirname(__file__)) From 7624dc877b054a32c9b3aa658549d1b9be53915c Mon Sep 17 00:00:00 2001 From: Rodolfo Rodriguez Date: Sun, 31 Mar 2019 14:03:54 -0500 Subject: [PATCH 61/83] Update license year (#210) --- Licence.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Licence.txt b/Licence.txt index 3361c0b2..2aee4738 100644 --- a/Licence.txt +++ b/Licence.txt @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2015-2018 Hardik Vasa +Copyright (c) 2015-2019 Hardik Vasa Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal From e9189d4d5ff755cc1a9fbb29c3e6c6b459551a6a Mon Sep 17 00:00:00 2001 From: Lavinia Lee Date: Tue, 2 Apr 2019 00:13:43 -0400 Subject: [PATCH 62/83] just removes a line that makes filenames lowercase (#175) --- google_images_download/google_images_download.py | 1 - 1 file changed, 1 deletion(-) diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index 9a9fcf3d..11ce2b2b 100755 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -579,7 +579,6 @@ def download_image(self,image_url,image_format,main_directory,dir_name,count,pri # keep everything after the last '/' image_name = str(image_url[(image_url.rfind('/')) + 1:]) - image_name = image_name.lower() # if no extension then add it # remove everything after the image name if image_format == "": From a348dc4ee9aa77b615a00ac9fd5b60e99a4a6d5a Mon Sep 17 00:00:00 2001 From: piotrex Date: Tue, 23 Apr 2019 05:20:13 +0200 Subject: [PATCH 63/83] Update google_images_download.py (#114) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit letter 'ś' and BadStatusLine error --- .../google_images_download.py | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index 11ce2b2b..a8e56ecc 100755 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -14,7 +14,7 @@ from urllib.request import URLError, HTTPError from urllib.parse import quote import http.client - from http.client import IncompleteRead + from http.client import IncompleteRead, BadStatusLine http.client._MAXHEADERS = 1000 else: # If the Current Version of Python is 2.x import urllib2 @@ -22,7 +22,7 @@ from urllib2 import URLError, HTTPError from urllib import quote import httplib - from httplib import IncompleteRead + from httplib import IncompleteRead, BadStatusLine httplib._MAXHEADERS = 1000 import time # Importing the time library to check the time of code execution import os @@ -416,10 +416,10 @@ def build_search_url(self,search_term,params,url,similar_images,specific_site,sa url = 'https://www.google.com/search?q=' + keywordem + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg' elif specific_site: url = 'https://www.google.com/search?q=' + quote( - search_term) + '&as_sitesearch=' + specific_site + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch' + params + '&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg' + search_term.encode('utf-8')) + '&as_sitesearch=' + specific_site + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch' + params + '&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg' else: url = 'https://www.google.com/search?q=' + quote( - search_term) + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch' + params + '&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg' + search_term.encode('utf-8')) + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch' + params + '&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg' #safe search check if safe_search: @@ -470,7 +470,7 @@ def create_directories(self,main_directory, dir_name,thumbnail): if not os.path.exists(main_directory): os.makedirs(main_directory) time.sleep(0.2) - path = str(dir_name) + path = (dir_name) sub_directory = os.path.join(main_directory, path) if not os.path.exists(sub_directory): os.makedirs(sub_directory) @@ -479,7 +479,7 @@ def create_directories(self,main_directory, dir_name,thumbnail): if not os.path.exists(sub_directory_thumbnail): os.makedirs(sub_directory_thumbnail) else: - path = str(dir_name) + path = (dir_name) sub_directory = os.path.join(main_directory, path) if not os.path.exists(sub_directory): os.makedirs(sub_directory) @@ -630,6 +630,12 @@ def download_image(self,image_url,image_format,main_directory,dir_name,count,pri download_message = "URLError on an image...trying next one..." + " Error: " + str(e) return_image_name = '' absolute_path = '' + + except BadStatusLine as e: + download_status = 'fail' + download_message = "BadStatusLine on an image...trying next one..." + " Error: " + str(e) + return_image_name = '' + absolute_path = '' except HTTPError as e: # If there is any HTTPError download_status = 'fail' @@ -828,7 +834,7 @@ def download(self,arguments): for sky in suffix_keywords: # 1.for every suffix keywords i = 0 while i < len(search_keyword): # 2.for every main keyword - iteration = "\n" + "Item no.: " + str(i + 1) + " -->" + " Item name = " + str(pky) + str(search_keyword[i] + str(sky)) + iteration = "\n" + "Item no.: " + str(i + 1) + " -->" + " Item name = " + (pky) + (search_keyword[i]) + (sky) print(iteration) print("Evaluating...") search_term = pky + search_keyword[i] + sky From 65dc17d3c62cefeb760904388230b36177bc7362 Mon Sep 17 00:00:00 2001 From: Jithin James Date: Mon, 29 Apr 2019 22:15:22 +0530 Subject: [PATCH 64/83] Fix Arguments List (Alphabetic Order) (#176) --- README.rst | 264 ++++++++++++++++++++++++++--------------------------- 1 file changed, 132 insertions(+), 132 deletions(-) diff --git a/README.rst b/README.rst index 5bab145b..96b95d5d 100644 --- a/README.rst +++ b/README.rst @@ -86,6 +86,22 @@ Arguments +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | Argument | Short hand | Description | +===================+=============+===============================================================================================================================+ +| aspect_ratio | a | Denotes the aspect ratio of images to download. | +| | | | +| | | `Possible values: tall, square, wide, panoramic` | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| chromedriver | cd | With this argument you can pass the path to the 'chromedriver'. | +| | | | +| | | The path looks like this: "path/to/chromedriver". In windows it will be "C:\\path\\to\\chromedriver.exe" | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| color | co | Denotes the color filter that you want to apply to the images. | +| | | | +| | | `Possible values: red, orange, yellow, green, teal, blue, purple, pink, white, gray, black, brown` | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| color_type | ct | Denotes the color type you want to apply to the images. | +| | | | +| | | `Possible values: full-color, black-and-white, transparent` | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | config_file | cf | You can pass the arguments inside a config file. This is an alternative to passing arguments on the command line directly. | | | | | | | | Please refer to the | @@ -95,6 +111,35 @@ Arguments | | | * Config file can only be in **JSON** format | | | | * Please refrain from passing invalid arguments from config file. Refer to the below arguments list | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| delay | d | Time to wait between downloading two images | +| | | | +| | | Time is to be specified in seconds. But you can have sub-second times by using decimal points. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| exact_size | es | You can specify the exact size/resolution of the images | +| | | | +| | | This value of this argument can be specified as ```` where the fist integer stands for width of the image | +| | | and the second integer stands for the height of the image. For example, ``-es 1024,786`` | +| | | | +| | | **Note**: You cannot specify both 'size' and 'exact_size' arguments in the same query. You can only give one of them. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| extract_metadata | e | This option allows you to save metadata of all the downloaded images in a JSON file. | +| | | | +| | | This file can be found in the ``logs/`` directory. The name of the file would be same as the keyword nam | +| | | | +| | | This argument does not take any value. Just add '--extract_metadata' or '-e' in your query. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| format | f | Denotes the format/extension of the image that you want to download. | +| | | | +| | | `Possible values: jpg, gif, png, bmp, svg, webp, ico` | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| help | h | show the help message regarding the usage of the above arguments | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| image_directory | i | This lets you specify a directory inside of the main directory (output_directory) in which the images will be saved | +| | | | +| | | If not specified, it will default to the name of the keyword. | +| | | | +| | | The directory structure would look like: ```` | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | keywords | k | Denotes the keywords/key phrases you want to search for. For more than one keywords, wrap it in single quotes. | | | | | | | | Tips: | @@ -110,19 +155,11 @@ Arguments | | | | | | | Only file types '.txt' or '.csv' are allowed. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| prefix_keywords | pk | Denotes additional words added before main keyword while making the search query. | -| | | | -| | | The final search query would be: | -| | | | -| | | So, for example, if the keyword is 'car' and prefix_keyword is 'red,yellow,blue', it will search and download images for | -| | | 'red car', 'yellow car' and 'blue car' individually | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| suffix_keywords | sk | Denotes additional words added after main keyword while making the search query. | -| | | | -| | | The final search query would be: | +| language | la | Defines the language filter. The search results are automatically returned in that language | | | | | -| | | So, for example, if the keyword is 'car' and suffix_keyword is 'red,yellow,blue', it will search and download images for | -| | | 'car red', 'car yellow' and 'car blue' individually | +| | | `Possible Values: Arabic, Chinese (Simplified), Chinese (Traditional), Czech, Danish, Dutch, English, Estonian. Finnish, | +| | | French, German, Greek, Hebrew, Hungarian, Icelandic, Italian, Japanese, Korean, Latvianm, Lithuanian, Norwegian, Portuguese, | +| | | Polish, Romanian, Russian, Spanish, Swedish, Turkish` | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | limit | l | Denotes number of images that you want to download. | | | | | @@ -132,177 +169,140 @@ Arguments | | | | | | | **Note**: In case of occasional errors while downloading images, you could get less than 100 (if the limit is set to 100) | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| related_images | ri | This argument downloads a ton of images related to the keyword you provided. | -| | | | -| | | Google Images page returns list of related keywords to the keyword you have mentioned in the query. This tool downloads | -| | | images from each of those related keywords based on the limit you have mentioned in your query | -| | | | -| | | This argument does not take any value. Just add '--related_images' or '-ri' in your query. | +| metadata | m | Prints the metada of the image on the console. | | | | | -| | | **Note:** This argument can download hundreds or thousands of additional images so please use this carefully. | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| format | f | Denotes the format/extension of the image that you want to download. | +| | | This includes image size, origin, image attributes, description, image URL, etc. | | | | | -| | | `Possible values: jpg, gif, png, bmp, svg, webp, ico` | +| | | This argument does not take any value. Just add '--metadata' or '-m' in your query. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| color | co | Denotes the color filter that you want to apply to the images. | +| no_directory | n | This option allows you download images directly in the main directory (output_directory) without an image_directory | | | | | -| | | `Possible values: red, orange, yellow, green, teal, blue, purple, pink, white, gray, black, brown` | +| | | The directory structure would look like: ```` | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| color_type | ct | Denotes the color type you want to apply to the images. | +| no_download | nd | Print the URLs of the images on the console without downloading them. These image URLs can be used for debugging purposes | | | | | -| | | `Possible values: full-color, black-and-white, transparent` | +| | | This argument does not take any value. Just add '--no-download' or '-nd' in your query. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| usage_rights | r | Denotes the usage rights/licence under which the image is classified. | -| | | | -| | | `Possible values:` | +| no_numbering | nn | When you specify this argument, the script does not add ordered numbering as prefix to the images it downloads | | | | | -| | | * `labeled-for-reuse-with-modifications`, | -| | | * `labeled-for-reuse`, | -| | | * `labeled-for-noncommercial-reuse-with-modification`, | -| | | * `labeled-for-nocommercial-reuse` | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| size | s | Denotes the relative size of the image to be downloaded. | +| | | If this argument is not specified, the images are numbered in order in which they are downloaded | | | | | -| | | `Possible values: large, medium, icon, >400*300, >640*480, >800*600, >1024*768, >2MP, >4MP, >6MP, >8MP, >10MP, | -| | | >12MP, >15MP, >20MP, >40MP, >70MP` | +| | | This argument does not take any value. Just add '--no_numbering' or '-nn' in your query. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| exact_size | es | You can specify the exact size/resolution of the images | -| | | | -| | | This value of this argument can be specified as ```` where the fist integer stands for width of the image | -| | | and the second integer stands for the height of the image. For example, ``-es 1024,786`` | +| offset | of | When you specify this argument, it will skip the offset number of links before it starts downloading images | | | | | -| | | **Note**: You cannot specify both 'size' and 'exact_size' arguments in the same query. You can only give one of them. | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| aspect_ratio | a | Denotes the aspect ratio of images to download. | +| | | If this argument is not specified, the script will start downloading form the first link until the limit is reached | | | | | -| | | `Possible values: tall, square, wide, panoramic` | +| | | This argument takes integer. Make sure the value of this argument is less than the value of limit | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| type | t | Denotes the type of image to be downloaded. | +| output_directory | o | Allows you specify the main directory name in which the images are downloaded. | | | | | -| | | `Possible values: face, photo, clip-art, line-drawing, animated` | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| time | w | Denotes the time the image was uploaded/indexed. | +| | | If not specified, it will default to 'downloads' directory. This directory is located in the path from where you run this code| | | | | -| | | `Possible values: past-24-hours, past-7-days` | +| | | The directory structure would look like: ```` | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| time_range | wr | Denotes the time range for which you want to search the images | +| prefix | pr | A word that you would want to prefix in front of actual image name. | | | | | -| | | The value of this parameter should be in the following format '{"time_min":"MM/DD/YYYY","time_max":"MM/DD/YYYY"}' | +| | | This feature can be used to rename files for image identification purpose. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| delay | d | Time to wait between downloading two images | +| prefix_keywords | pk | Denotes additional words added before main keyword while making the search query. | | | | | -| | | Time is to be specified in seconds. But you can have sub-second times by using decimal points. | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| url | u | Allows you search by image when you have the URL from the Google Images page. | -| | | It downloads images from the google images link provided | +| | | The final search query would be: | | | | | -| | | If you are searching an image on the browser google images page, simply grab the browser URL and paste it in this parameter | -| | | It will download all the images seen on that page. | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| single_image | x | Allows you to download one image if the complete (absolute) URL of the image is provided | +| | | So, for example, if the keyword is 'car' and prefix_keyword is 'red,yellow,blue', it will search and download images for | +| | | 'red car', 'yellow car' and 'blue car' individually | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| output_directory | o | Allows you specify the main directory name in which the images are downloaded. | +| print_paths | pp | Prints the list of all the absolute paths of the downloaded images | | | | | -| | | If not specified, it will default to 'downloads' directory. This directory is located in the path from where you run this code| +| | | When calling the script from another python file, this list will be saved in a variable (as shown in the example below) | | | | | -| | | The directory structure would look like: ```` | +| | | This argument also allows you to print the list on the console | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| image_directory | i | This lets you specify a directory inside of the main directory (output_directory) in which the images will be saved | +| print_size | ps | Prints the size of the images on the console | | | | | -| | | If not specified, it will default to the name of the keyword. | +| | | The size denoted the actual size of the image and not the size of the image on disk | | | | | -| | | The directory structure would look like: ```` | +| | | This argument does not take any value. Just add '--print_size' or '-ps' in your query. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| no_directory | n | This option allows you download images directly in the main directory (output_directory) without an image_directory | +| print_urls | p | Print the URLs of the images on the console. These image URLs can be used for debugging purposes | | | | | -| | | The directory structure would look like: ```` | +| | | This argument does not take any value. Just add '--print_urls' or '-p' in your query. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | proxy | px | Allows you to specify proxy server setting for all your requests | | | | | | | | You can specify the proxy settings in 'IP:Port' format | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| similar_images | si | Reverse Image Search or 'Search by Image' as it is referred to on Google. | -| | | | -| | | Searches and downloads images that are similar to the absolute image link/url you provide. | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| specific_site | ss | Allows you to download images with keywords only from a specific website/domain name you mention. | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| print_urls | p | Print the URLs of the images on the console. These image URLs can be used for debugging purposes | +| related_images | ri | This argument downloads a ton of images related to the keyword you provided. | | | | | -| | | This argument does not take any value. Just add '--print_urls' or '-p' in your query. | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| print_size | ps | Prints the size of the images on the console | +| | | Google Images page returns list of related keywords to the keyword you have mentioned in the query. This tool downloads | +| | | images from each of those related keywords based on the limit you have mentioned in your query | | | | | -| | | The size denoted the actual size of the image and not the size of the image on disk | +| | | This argument does not take any value. Just add '--related_images' or '-ri' in your query. | | | | | -| | | This argument does not take any value. Just add '--print_size' or '-ps' in your query. | +| | | **Note:** This argument can download hundreds or thousands of additional images so please use this carefully. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| print_paths | pp | Prints the list of all the absolute paths of the downloaded images | +| safe_search | sa | Searches for images with the Safe Search filter On | | | | | -| | | When calling the script from another python file, this list will be saved in a variable (as shown in the example below) | +| | | And this filter will be Off by default if you do not specify the safe_search argument | | | | | -| | | This argument also allows you to print the list on the console | +| | | This argument does not take any value. Just add '--safe_search' or '-sa' in your query. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| metadata | m | Prints the metada of the image on the console. | -| | | | -| | | This includes image size, origin, image attributes, description, image URL, etc. | +| similar_images | si | Reverse Image Search or 'Search by Image' as it is referred to on Google. | | | | | -| | | This argument does not take any value. Just add '--metadata' or '-m' in your query. | +| | | Searches and downloads images that are similar to the absolute image link/url you provide. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| extract_metadata | e | This option allows you to save metadata of all the downloaded images in a JSON file. | -| | | | -| | | This file can be found in the ``logs/`` directory. The name of the file would be same as the keyword nam | +| single_image | x | Allows you to download one image if the complete (absolute) URL of the image is provided | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| size | s | Denotes the relative size of the image to be downloaded. | | | | | -| | | This argument does not take any value. Just add '--extract_metadata' or '-e' in your query. | +| | | `Possible values: large, medium, icon, >400*300, >640*480, >800*600, >1024*768, >2MP, >4MP, >6MP, >8MP, >10MP, | +| | | >12MP, >15MP, >20MP, >40MP, >70MP` | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | socket_timeout | st | Allows you to specify the time to wait for socket connection. | | | | | | | | You could specify a higher timeout time for slow internet connection. The default value is 10 seconds. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| specific_site | ss | Allows you to download images with keywords only from a specific website/domain name you mention. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| suffix_keywords | sk | Denotes additional words added after main keyword while making the search query. | +| | | | +| | | The final search query would be: | +| | | | +| | | So, for example, if the keyword is 'car' and suffix_keyword is 'red,yellow,blue', it will search and download images for | +| | | 'car red', 'car yellow' and 'car blue' individually | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | thumbnail | th | Downloads image thumbnails corresponding to each image downloaded. | | | | | | | | Thumbnails are saved in their own sub-directories inside of the main directory. | | | | | | | | This argument does not take any value. Just add '--thumbnail' or '-th' in your query. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| language | la | Defines the language filter. The search results are automatically returned in that language | -| | | | -| | | `Possible Values: Arabic, Chinese (Simplified), Chinese (Traditional), Czech, Danish, Dutch, English, Estonian. Finnish, | -| | | French, German, Greek, Hebrew, Hungarian, Icelandic, Italian, Japanese, Korean, Latvianm, Lithuanian, Norwegian, Portuguese, | -| | | Polish, Romanian, Russian, Spanish, Swedish, Turkish` | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| prefix | pr | A word that you would want to prefix in front of actual image name. | +| time | w | Denotes the time the image was uploaded/indexed. | | | | | -| | | This feature can be used to rename files for image identification purpose. | +| | | `Possible values: past-24-hours, past-7-days` | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| chromedriver | cd | With this argument you can pass the path to the 'chromedriver'. | +| time_range | wr | Denotes the time range for which you want to search the images | | | | | -| | | The path looks like this: "path/to/chromedriver". In windows it will be "C:\\path\\to\\chromedriver.exe" | +| | | The value of this parameter should be in the following format '{"time_min":"MM/DD/YYYY","time_max":"MM/DD/YYYY"}' | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| safe_search | sa | Searches for images with the Safe Search filter On | -| | | | -| | | And this filter will be Off by default if you do not specify the safe_search argument | +| type | t | Denotes the type of image to be downloaded. | | | | | -| | | This argument does not take any value. Just add '--safe_search' or '-sa' in your query. | +| | | `Possible values: face, photo, clip-art, line-drawing, animated` | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| no_numbering | nn | When you specify this argument, the script does not add ordered numbering as prefix to the images it downloads | -| | | | -| | | If this argument is not specified, the images are numbered in order in which they are downloaded | +| url | u | Allows you search by image when you have the URL from the Google Images page. | +| | | It downloads images from the google images link provided | | | | | -| | | This argument does not take any value. Just add '--no_numbering' or '-nn' in your query. | +| | | If you are searching an image on the browser google images page, simply grab the browser URL and paste it in this parameter | +| | | It will download all the images seen on that page. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| offset | of | When you specify this argument, it will skip the offset number of links before it starts downloading images | -| | | | -| | | If this argument is not specified, the script will start downloading form the first link until the limit is reached | +| usage_rights | r | Denotes the usage rights/licence under which the image is classified. | | | | | -| | | This argument takes integer. Make sure the value of this argument is less than the value of limit | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| no_download | nd | Print the URLs of the images on the console without downloading them. These image URLs can be used for debugging purposes | +| | | `Possible values:` | | | | | -| | | This argument does not take any value. Just add '--no-download' or '-nd' in your query. | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| help | h | show the help message regarding the usage of the above arguments | +| | | * `labeled-for-reuse-with-modifications`, | +| | | * `labeled-for-reuse`, | +| | | * `labeled-for-noncommercial-reuse-with-modification`, | +| | | * `labeled-for-nocommercial-reuse` | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ @@ -394,61 +394,61 @@ Examples - To use non-English keywords for image search .. code-block:: bash - + $ googleimagesdownload -k "北极熊" -l 5 - To download images from the google images link .. code-block:: bash - + $ googleimagesdownload -k "sample" -u - To save images in specific main directory (instead of in 'downloads') .. code-block:: bash - + $ googleimagesdownload -k "boat" -o "boat_new" - To download one single image with the image URL .. code-block:: bash - + $ googleimagesdownload --keywords "baloons" --single_image - To download images with size and type constrains .. code-block:: bash - + $ googleimagesdownload --keywords "baloons" --size medium --type animated - To download images with specific usage rights .. code-block:: bash - + $ googleimagesdownload --keywords "universe" --usage_rights labeled-for-reuse - To download images with specific color type .. code-block:: bash - + $ googleimagesdownload --keywords "flowers" --color_type black-and-white - To download images with specific aspect ratio .. code-block:: bash - + $ googleimagesdownload --keywords "universe" --aspect_ratio panoramic - To download images which are similar to the image in the image URL that you provided (Reverse Image search). .. code-block:: bash - + $ googleimagesdownload -si -l 10 - To download images from specific website or domain name for a given keyword .. code-block:: bash - + $ googleimagesdownload --keywords "universe" --specific_site example.com ===> The images would be downloaded in their own sub-directories inside the main directory @@ -473,7 +473,7 @@ To get the details of the repo, run the following command: .. code-block:: bash - $ pip show -f google_images_download + $ pip show -f google_images_download you will get the result like this: @@ -486,7 +486,7 @@ you will get the result like this: together they make: ``/Library/Frameworks/Python.framework/Versions/2.7/bin`` which you need add it to the path: .. code-block:: bash - + $ export PATH="/Library/Frameworks/Python.framework/Versions/2.7/bin" @@ -500,7 +500,7 @@ When you run the command, it downloads the images in the current directory (the On MAC and Linux, when you get permission denied when installing the library using pip, try doing a user install. .. code-block:: bash - + $ pip install google_images_download --user You can also run pip install as a superuser with ``sudo pip install google_images_download`` but it is not generally a good idea because it can cause issues with your system-level packages. From 5db464986b272478839828424a72f1969518b523 Mon Sep 17 00:00:00 2001 From: Hardik Vasa Date: Mon, 29 Apr 2019 09:55:19 -0700 Subject: [PATCH 65/83] Revert "Fix Arguments List (Alphabetic Order) (#176)" (#228) This reverts commit 65dc17d3c62cefeb760904388230b36177bc7362. --- README.rst | 264 ++++++++++++++++++++++++++--------------------------- 1 file changed, 132 insertions(+), 132 deletions(-) diff --git a/README.rst b/README.rst index 96b95d5d..5bab145b 100644 --- a/README.rst +++ b/README.rst @@ -86,22 +86,6 @@ Arguments +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | Argument | Short hand | Description | +===================+=============+===============================================================================================================================+ -| aspect_ratio | a | Denotes the aspect ratio of images to download. | -| | | | -| | | `Possible values: tall, square, wide, panoramic` | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| chromedriver | cd | With this argument you can pass the path to the 'chromedriver'. | -| | | | -| | | The path looks like this: "path/to/chromedriver". In windows it will be "C:\\path\\to\\chromedriver.exe" | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| color | co | Denotes the color filter that you want to apply to the images. | -| | | | -| | | `Possible values: red, orange, yellow, green, teal, blue, purple, pink, white, gray, black, brown` | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| color_type | ct | Denotes the color type you want to apply to the images. | -| | | | -| | | `Possible values: full-color, black-and-white, transparent` | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | config_file | cf | You can pass the arguments inside a config file. This is an alternative to passing arguments on the command line directly. | | | | | | | | Please refer to the | @@ -111,35 +95,6 @@ Arguments | | | * Config file can only be in **JSON** format | | | | * Please refrain from passing invalid arguments from config file. Refer to the below arguments list | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| delay | d | Time to wait between downloading two images | -| | | | -| | | Time is to be specified in seconds. But you can have sub-second times by using decimal points. | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| exact_size | es | You can specify the exact size/resolution of the images | -| | | | -| | | This value of this argument can be specified as ```` where the fist integer stands for width of the image | -| | | and the second integer stands for the height of the image. For example, ``-es 1024,786`` | -| | | | -| | | **Note**: You cannot specify both 'size' and 'exact_size' arguments in the same query. You can only give one of them. | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| extract_metadata | e | This option allows you to save metadata of all the downloaded images in a JSON file. | -| | | | -| | | This file can be found in the ``logs/`` directory. The name of the file would be same as the keyword nam | -| | | | -| | | This argument does not take any value. Just add '--extract_metadata' or '-e' in your query. | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| format | f | Denotes the format/extension of the image that you want to download. | -| | | | -| | | `Possible values: jpg, gif, png, bmp, svg, webp, ico` | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| help | h | show the help message regarding the usage of the above arguments | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| image_directory | i | This lets you specify a directory inside of the main directory (output_directory) in which the images will be saved | -| | | | -| | | If not specified, it will default to the name of the keyword. | -| | | | -| | | The directory structure would look like: ```` | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | keywords | k | Denotes the keywords/key phrases you want to search for. For more than one keywords, wrap it in single quotes. | | | | | | | | Tips: | @@ -155,11 +110,19 @@ Arguments | | | | | | | Only file types '.txt' or '.csv' are allowed. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| language | la | Defines the language filter. The search results are automatically returned in that language | +| prefix_keywords | pk | Denotes additional words added before main keyword while making the search query. | | | | | -| | | `Possible Values: Arabic, Chinese (Simplified), Chinese (Traditional), Czech, Danish, Dutch, English, Estonian. Finnish, | -| | | French, German, Greek, Hebrew, Hungarian, Icelandic, Italian, Japanese, Korean, Latvianm, Lithuanian, Norwegian, Portuguese, | -| | | Polish, Romanian, Russian, Spanish, Swedish, Turkish` | +| | | The final search query would be: | +| | | | +| | | So, for example, if the keyword is 'car' and prefix_keyword is 'red,yellow,blue', it will search and download images for | +| | | 'red car', 'yellow car' and 'blue car' individually | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| suffix_keywords | sk | Denotes additional words added after main keyword while making the search query. | +| | | | +| | | The final search query would be: | +| | | | +| | | So, for example, if the keyword is 'car' and suffix_keyword is 'red,yellow,blue', it will search and download images for | +| | | 'car red', 'car yellow' and 'car blue' individually | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | limit | l | Denotes number of images that you want to download. | | | | | @@ -169,107 +132,133 @@ Arguments | | | | | | | **Note**: In case of occasional errors while downloading images, you could get less than 100 (if the limit is set to 100) | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| metadata | m | Prints the metada of the image on the console. | +| related_images | ri | This argument downloads a ton of images related to the keyword you provided. | | | | | -| | | This includes image size, origin, image attributes, description, image URL, etc. | +| | | Google Images page returns list of related keywords to the keyword you have mentioned in the query. This tool downloads | +| | | images from each of those related keywords based on the limit you have mentioned in your query | | | | | -| | | This argument does not take any value. Just add '--metadata' or '-m' in your query. | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| no_directory | n | This option allows you download images directly in the main directory (output_directory) without an image_directory | +| | | This argument does not take any value. Just add '--related_images' or '-ri' in your query. | | | | | -| | | The directory structure would look like: ```` | +| | | **Note:** This argument can download hundreds or thousands of additional images so please use this carefully. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| no_download | nd | Print the URLs of the images on the console without downloading them. These image URLs can be used for debugging purposes | +| format | f | Denotes the format/extension of the image that you want to download. | | | | | -| | | This argument does not take any value. Just add '--no-download' or '-nd' in your query. | +| | | `Possible values: jpg, gif, png, bmp, svg, webp, ico` | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| no_numbering | nn | When you specify this argument, the script does not add ordered numbering as prefix to the images it downloads | -| | | | -| | | If this argument is not specified, the images are numbered in order in which they are downloaded | +| color | co | Denotes the color filter that you want to apply to the images. | | | | | -| | | This argument does not take any value. Just add '--no_numbering' or '-nn' in your query. | +| | | `Possible values: red, orange, yellow, green, teal, blue, purple, pink, white, gray, black, brown` | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| offset | of | When you specify this argument, it will skip the offset number of links before it starts downloading images | -| | | | -| | | If this argument is not specified, the script will start downloading form the first link until the limit is reached | +| color_type | ct | Denotes the color type you want to apply to the images. | | | | | -| | | This argument takes integer. Make sure the value of this argument is less than the value of limit | +| | | `Possible values: full-color, black-and-white, transparent` | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| output_directory | o | Allows you specify the main directory name in which the images are downloaded. | +| usage_rights | r | Denotes the usage rights/licence under which the image is classified. | | | | | -| | | If not specified, it will default to 'downloads' directory. This directory is located in the path from where you run this code| +| | | `Possible values:` | | | | | -| | | The directory structure would look like: ```` | +| | | * `labeled-for-reuse-with-modifications`, | +| | | * `labeled-for-reuse`, | +| | | * `labeled-for-noncommercial-reuse-with-modification`, | +| | | * `labeled-for-nocommercial-reuse` | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| prefix | pr | A word that you would want to prefix in front of actual image name. | +| size | s | Denotes the relative size of the image to be downloaded. | | | | | -| | | This feature can be used to rename files for image identification purpose. | +| | | `Possible values: large, medium, icon, >400*300, >640*480, >800*600, >1024*768, >2MP, >4MP, >6MP, >8MP, >10MP, | +| | | >12MP, >15MP, >20MP, >40MP, >70MP` | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| prefix_keywords | pk | Denotes additional words added before main keyword while making the search query. | +| exact_size | es | You can specify the exact size/resolution of the images | | | | | -| | | The final search query would be: | +| | | This value of this argument can be specified as ```` where the fist integer stands for width of the image | +| | | and the second integer stands for the height of the image. For example, ``-es 1024,786`` | | | | | -| | | So, for example, if the keyword is 'car' and prefix_keyword is 'red,yellow,blue', it will search and download images for | -| | | 'red car', 'yellow car' and 'blue car' individually | +| | | **Note**: You cannot specify both 'size' and 'exact_size' arguments in the same query. You can only give one of them. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| print_paths | pp | Prints the list of all the absolute paths of the downloaded images | +| aspect_ratio | a | Denotes the aspect ratio of images to download. | | | | | -| | | When calling the script from another python file, this list will be saved in a variable (as shown in the example below) | +| | | `Possible values: tall, square, wide, panoramic` | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| type | t | Denotes the type of image to be downloaded. | | | | | -| | | This argument also allows you to print the list on the console | +| | | `Possible values: face, photo, clip-art, line-drawing, animated` | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| print_size | ps | Prints the size of the images on the console | +| time | w | Denotes the time the image was uploaded/indexed. | | | | | -| | | The size denoted the actual size of the image and not the size of the image on disk | +| | | `Possible values: past-24-hours, past-7-days` | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| time_range | wr | Denotes the time range for which you want to search the images | | | | | -| | | This argument does not take any value. Just add '--print_size' or '-ps' in your query. | +| | | The value of this parameter should be in the following format '{"time_min":"MM/DD/YYYY","time_max":"MM/DD/YYYY"}' | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| print_urls | p | Print the URLs of the images on the console. These image URLs can be used for debugging purposes | +| delay | d | Time to wait between downloading two images | | | | | -| | | This argument does not take any value. Just add '--print_urls' or '-p' in your query. | +| | | Time is to be specified in seconds. But you can have sub-second times by using decimal points. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| proxy | px | Allows you to specify proxy server setting for all your requests | +| url | u | Allows you search by image when you have the URL from the Google Images page. | +| | | It downloads images from the google images link provided | | | | | -| | | You can specify the proxy settings in 'IP:Port' format | +| | | If you are searching an image on the browser google images page, simply grab the browser URL and paste it in this parameter | +| | | It will download all the images seen on that page. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| related_images | ri | This argument downloads a ton of images related to the keyword you provided. | +| single_image | x | Allows you to download one image if the complete (absolute) URL of the image is provided | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| output_directory | o | Allows you specify the main directory name in which the images are downloaded. | | | | | -| | | Google Images page returns list of related keywords to the keyword you have mentioned in the query. This tool downloads | -| | | images from each of those related keywords based on the limit you have mentioned in your query | +| | | If not specified, it will default to 'downloads' directory. This directory is located in the path from where you run this code| | | | | -| | | This argument does not take any value. Just add '--related_images' or '-ri' in your query. | +| | | The directory structure would look like: ```` | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| image_directory | i | This lets you specify a directory inside of the main directory (output_directory) in which the images will be saved | | | | | -| | | **Note:** This argument can download hundreds or thousands of additional images so please use this carefully. | +| | | If not specified, it will default to the name of the keyword. | +| | | | +| | | The directory structure would look like: ```` | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| safe_search | sa | Searches for images with the Safe Search filter On | +| no_directory | n | This option allows you download images directly in the main directory (output_directory) without an image_directory | | | | | -| | | And this filter will be Off by default if you do not specify the safe_search argument | +| | | The directory structure would look like: ```` | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| proxy | px | Allows you to specify proxy server setting for all your requests | | | | | -| | | This argument does not take any value. Just add '--safe_search' or '-sa' in your query. | +| | | You can specify the proxy settings in 'IP:Port' format | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | similar_images | si | Reverse Image Search or 'Search by Image' as it is referred to on Google. | | | | | | | | Searches and downloads images that are similar to the absolute image link/url you provide. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| single_image | x | Allows you to download one image if the complete (absolute) URL of the image is provided | +| specific_site | ss | Allows you to download images with keywords only from a specific website/domain name you mention. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| size | s | Denotes the relative size of the image to be downloaded. | +| print_urls | p | Print the URLs of the images on the console. These image URLs can be used for debugging purposes | | | | | -| | | `Possible values: large, medium, icon, >400*300, >640*480, >800*600, >1024*768, >2MP, >4MP, >6MP, >8MP, >10MP, | -| | | >12MP, >15MP, >20MP, >40MP, >70MP` | +| | | This argument does not take any value. Just add '--print_urls' or '-p' in your query. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| socket_timeout | st | Allows you to specify the time to wait for socket connection. | +| print_size | ps | Prints the size of the images on the console | | | | | -| | | You could specify a higher timeout time for slow internet connection. The default value is 10 seconds. | +| | | The size denoted the actual size of the image and not the size of the image on disk | +| | | | +| | | This argument does not take any value. Just add '--print_size' or '-ps' in your query. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| specific_site | ss | Allows you to download images with keywords only from a specific website/domain name you mention. | +| print_paths | pp | Prints the list of all the absolute paths of the downloaded images | +| | | | +| | | When calling the script from another python file, this list will be saved in a variable (as shown in the example below) | +| | | | +| | | This argument also allows you to print the list on the console | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| suffix_keywords | sk | Denotes additional words added after main keyword while making the search query. | +| metadata | m | Prints the metada of the image on the console. | | | | | -| | | The final search query would be: | +| | | This includes image size, origin, image attributes, description, image URL, etc. | | | | | -| | | So, for example, if the keyword is 'car' and suffix_keyword is 'red,yellow,blue', it will search and download images for | -| | | 'car red', 'car yellow' and 'car blue' individually | +| | | This argument does not take any value. Just add '--metadata' or '-m' in your query. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| extract_metadata | e | This option allows you to save metadata of all the downloaded images in a JSON file. | +| | | | +| | | This file can be found in the ``logs/`` directory. The name of the file would be same as the keyword nam | +| | | | +| | | This argument does not take any value. Just add '--extract_metadata' or '-e' in your query. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| socket_timeout | st | Allows you to specify the time to wait for socket connection. | +| | | | +| | | You could specify a higher timeout time for slow internet connection. The default value is 10 seconds. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | thumbnail | th | Downloads image thumbnails corresponding to each image downloaded. | | | | | @@ -277,32 +266,43 @@ Arguments | | | | | | | This argument does not take any value. Just add '--thumbnail' or '-th' in your query. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| time | w | Denotes the time the image was uploaded/indexed. | +| language | la | Defines the language filter. The search results are automatically returned in that language | | | | | -| | | `Possible values: past-24-hours, past-7-days` | +| | | `Possible Values: Arabic, Chinese (Simplified), Chinese (Traditional), Czech, Danish, Dutch, English, Estonian. Finnish, | +| | | French, German, Greek, Hebrew, Hungarian, Icelandic, Italian, Japanese, Korean, Latvianm, Lithuanian, Norwegian, Portuguese, | +| | | Polish, Romanian, Russian, Spanish, Swedish, Turkish` | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| time_range | wr | Denotes the time range for which you want to search the images | +| prefix | pr | A word that you would want to prefix in front of actual image name. | | | | | -| | | The value of this parameter should be in the following format '{"time_min":"MM/DD/YYYY","time_max":"MM/DD/YYYY"}' | +| | | This feature can be used to rename files for image identification purpose. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| type | t | Denotes the type of image to be downloaded. | +| chromedriver | cd | With this argument you can pass the path to the 'chromedriver'. | | | | | -| | | `Possible values: face, photo, clip-art, line-drawing, animated` | +| | | The path looks like this: "path/to/chromedriver". In windows it will be "C:\\path\\to\\chromedriver.exe" | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| url | u | Allows you search by image when you have the URL from the Google Images page. | -| | | It downloads images from the google images link provided | +| safe_search | sa | Searches for images with the Safe Search filter On | | | | | -| | | If you are searching an image on the browser google images page, simply grab the browser URL and paste it in this parameter | -| | | It will download all the images seen on that page. | +| | | And this filter will be Off by default if you do not specify the safe_search argument | +| | | | +| | | This argument does not take any value. Just add '--safe_search' or '-sa' in your query. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| usage_rights | r | Denotes the usage rights/licence under which the image is classified. | +| no_numbering | nn | When you specify this argument, the script does not add ordered numbering as prefix to the images it downloads | | | | | -| | | `Possible values:` | +| | | If this argument is not specified, the images are numbered in order in which they are downloaded | | | | | -| | | * `labeled-for-reuse-with-modifications`, | -| | | * `labeled-for-reuse`, | -| | | * `labeled-for-noncommercial-reuse-with-modification`, | -| | | * `labeled-for-nocommercial-reuse` | +| | | This argument does not take any value. Just add '--no_numbering' or '-nn' in your query. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| offset | of | When you specify this argument, it will skip the offset number of links before it starts downloading images | +| | | | +| | | If this argument is not specified, the script will start downloading form the first link until the limit is reached | +| | | | +| | | This argument takes integer. Make sure the value of this argument is less than the value of limit | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| no_download | nd | Print the URLs of the images on the console without downloading them. These image URLs can be used for debugging purposes | +| | | | +| | | This argument does not take any value. Just add '--no-download' or '-nd' in your query. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| help | h | show the help message regarding the usage of the above arguments | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ @@ -394,61 +394,61 @@ Examples - To use non-English keywords for image search .. code-block:: bash - + $ googleimagesdownload -k "北极熊" -l 5 - To download images from the google images link .. code-block:: bash - + $ googleimagesdownload -k "sample" -u - To save images in specific main directory (instead of in 'downloads') .. code-block:: bash - + $ googleimagesdownload -k "boat" -o "boat_new" - To download one single image with the image URL .. code-block:: bash - + $ googleimagesdownload --keywords "baloons" --single_image - To download images with size and type constrains .. code-block:: bash - + $ googleimagesdownload --keywords "baloons" --size medium --type animated - To download images with specific usage rights .. code-block:: bash - + $ googleimagesdownload --keywords "universe" --usage_rights labeled-for-reuse - To download images with specific color type .. code-block:: bash - + $ googleimagesdownload --keywords "flowers" --color_type black-and-white - To download images with specific aspect ratio .. code-block:: bash - + $ googleimagesdownload --keywords "universe" --aspect_ratio panoramic - To download images which are similar to the image in the image URL that you provided (Reverse Image search). .. code-block:: bash - + $ googleimagesdownload -si -l 10 - To download images from specific website or domain name for a given keyword .. code-block:: bash - + $ googleimagesdownload --keywords "universe" --specific_site example.com ===> The images would be downloaded in their own sub-directories inside the main directory @@ -473,7 +473,7 @@ To get the details of the repo, run the following command: .. code-block:: bash - $ pip show -f google_images_download + $ pip show -f google_images_download you will get the result like this: @@ -486,7 +486,7 @@ you will get the result like this: together they make: ``/Library/Frameworks/Python.framework/Versions/2.7/bin`` which you need add it to the path: .. code-block:: bash - + $ export PATH="/Library/Frameworks/Python.framework/Versions/2.7/bin" @@ -500,7 +500,7 @@ When you run the command, it downloads the images in the current directory (the On MAC and Linux, when you get permission denied when installing the library using pip, try doing a user install. .. code-block:: bash - + $ pip install google_images_download --user You can also run pip install as a superuser with ``sudo pip install google_images_download`` but it is not generally a good idea because it can cause issues with your system-level packages. From 69b9eb65d8284aa76c6542c313ec3bdd1741bd17 Mon Sep 17 00:00:00 2001 From: hellogan <44324739+hellogan@users.noreply.github.com> Date: Mon, 29 Apr 2019 14:31:17 -0400 Subject: [PATCH 66/83] Adding returning of URL (#181) return URLs when no_download flag is set --- google_images_download/google_images_download.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index a8e56ecc..59a115ad 100755 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -562,7 +562,7 @@ def download_image(self,image_url,image_format,main_directory,dir_name,count,pri if print_urls or no_download: print("Image URL: " + image_url) if no_download: - return "success","Printed url without downloading",None,None + return "success","Printed url without downloading",None,image_url try: req = Request(image_url, headers={ "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"}) From c7e5ae6808711aa760b8b762f7a4173d16aa0489 Mon Sep 17 00:00:00 2001 From: elahimanesh Date: Mon, 29 Apr 2019 23:12:26 +0430 Subject: [PATCH 67/83] add past-month to time argument (#193) Add past-month to time argument for search. --- google_images_download/google_images_download.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index 59a115ad..9eec0190 100755 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -88,7 +88,7 @@ def user_input(): parser.add_argument('-t', '--type', help='image type', type=str, required=False, choices=['face','photo','clipart','line-drawing','animated']) parser.add_argument('-w', '--time', help='image age', type=str, required=False, - choices=['past-24-hours','past-7-days']) + choices=['past-24-hours','past-7-days','past-month']) parser.add_argument('-wr', '--time_range', help='time range for the age of the image. should be in the format {"time_min":"MM/DD/YYYY","time_max":"MM/DD/YYYY"}', type=str, required=False) parser.add_argument('-a', '--aspect_ratio', help='comma separated additional words added to keywords', type=str, required=False, choices=['tall', 'square', 'wide', 'panoramic']) @@ -385,7 +385,7 @@ def build_url_parameters(self,arguments): 'usage_rights':[arguments['usage_rights'],{'labeled-for-reuse-with-modifications':'sur:fmc','labeled-for-reuse':'sur:fc','labeled-for-noncommercial-reuse-with-modification':'sur:fm','labeled-for-nocommercial-reuse':'sur:f'}], 'size':[arguments['size'],{'large':'isz:l','medium':'isz:m','icon':'isz:i','>400*300':'isz:lt,islt:qsvga','>640*480':'isz:lt,islt:vga','>800*600':'isz:lt,islt:svga','>1024*768':'visz:lt,islt:xga','>2MP':'isz:lt,islt:2mp','>4MP':'isz:lt,islt:4mp','>6MP':'isz:lt,islt:6mp','>8MP':'isz:lt,islt:8mp','>10MP':'isz:lt,islt:10mp','>12MP':'isz:lt,islt:12mp','>15MP':'isz:lt,islt:15mp','>20MP':'isz:lt,islt:20mp','>40MP':'isz:lt,islt:40mp','>70MP':'isz:lt,islt:70mp'}], 'type':[arguments['type'],{'face':'itp:face','photo':'itp:photo','clipart':'itp:clipart','line-drawing':'itp:lineart','animated':'itp:animated'}], - 'time':[arguments['time'],{'past-24-hours':'qdr:d','past-7-days':'qdr:w'}], + 'time':[arguments['time'],{'past-24-hours':'qdr:d','past-7-days':'qdr:w','past-month':'qdr:m'}], 'aspect_ratio':[arguments['aspect_ratio'],{'tall':'iar:t','square':'iar:s','wide':'iar:w','panoramic':'iar:xw'}], 'format':[arguments['format'],{'jpg':'ift:jpg','gif':'ift:gif','png':'ift:png','bmp':'ift:bmp','svg':'ift:svg','webp':'webp','ico':'ift:ico'}]} for key, value in params.items(): From caf1a7efb9245cd1c5769a48d54da031d9a8f651 Mon Sep 17 00:00:00 2001 From: jimgros Date: Tue, 30 Apr 2019 00:16:32 +0300 Subject: [PATCH 68/83] Updated get_next_tab (#177) Changed the code for more accurate url_item_name --- google_images_download/google_images_download.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index 9eec0190..243f8487 100755 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -229,9 +229,16 @@ def get_next_tab(self,s): url_item = url_item.replace('&', '&') start_line_2 = s.find('class="dtviD"') + s = s.replace('&', '&') start_content_2 = s.find(':', start_line_2 + 1) - end_content_2 = s.find('"', start_content_2 + 1) + end_content_2 = s.find('&usg=', start_content_2 + 1) url_item_name = str(s[start_content_2 + 1:end_content_2]) + if url_item_name[-3:] == '%3D': + end_content_3 = url_item_name.rfind(':') + url_item_name = url_item_name[:end_content_3] + url_item_name = url_item_name.replace(',g_1:',' ') + url_item_name = url_item_name.replace(',online_chips:',' ') + url_item_name = url_item_name.replace('+',' ') #print(url_item,url_item_name) return url_item,url_item_name,end_content From 6da49c8769852952ed86e2cf1c495548f6cfac8a Mon Sep 17 00:00:00 2001 From: Tussank Gupta <34658024+cslite@users.noreply.github.com> Date: Tue, 30 Apr 2019 03:54:00 +0530 Subject: [PATCH 69/83] Add feature to save the image source page URL (#197) * Add feature to save image source page url * update README --- README.rst | 4 ++++ .../google_images_download.py | 21 ++++++++++++++----- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index 5bab145b..f9af4fa0 100644 --- a/README.rst +++ b/README.rst @@ -298,6 +298,10 @@ Arguments | | | | | | | This argument takes integer. Make sure the value of this argument is less than the value of limit | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| save_source | is | Creates a text file with list of downloaded images along with their source page paths. | +| | | | +| | | This argument takes a string, name of the text file. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | no_download | nd | Print the URLs of the images on the console without downloading them. These image URLs can be used for debugging purposes | | | | | | | | This argument does not take any value. Just add '--no-download' or '-nd' in your query. | diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index 243f8487..996da5df 100755 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -40,7 +40,7 @@ "output_directory", "image_directory", "no_directory", "proxy", "similar_images", "specific_site", "print_urls", "print_size", "print_paths", "metadata", "extract_metadata", "socket_timeout", "thumbnail", "language", "prefix", "chromedriver", "related_images", "safe_search", "no_numbering", - "offset", "no_download"] + "offset", "no_download","save_source"] def user_input(): @@ -111,6 +111,7 @@ def user_input(): parser.add_argument('-nn', '--no_numbering', default=False, help="Allows you to exclude the default numbering of images", action="store_true") parser.add_argument('-of', '--offset', help="Where to start in the fetched links", type=str, required=False) parser.add_argument('-nd', '--no_download', default=False, help="Prints the URLs of the images and/or thumbnails without downloading them", action="store_true") + parser.add_argument('-is', '--save_source', help="creates a text file containing a list of downloaded images along with source page url", type=str, required=False) args = parser.parse_args() arguments = vars(args) @@ -503,7 +504,7 @@ def create_directories(self,main_directory, dir_name,thumbnail): # Download Images - def download_image_thumbnail(self,image_url,main_directory,dir_name,return_image_name,print_urls,socket_timeout,print_size,no_download): + def download_image_thumbnail(self,image_url,main_directory,dir_name,return_image_name,print_urls,socket_timeout,print_size,no_download,save_source,img_src): if print_urls or no_download: print("Image URL: " + image_url) if no_download: @@ -528,6 +529,11 @@ def download_image_thumbnail(self,image_url,main_directory,dir_name,return_image output_file = open(path, 'wb') output_file.write(data) output_file.close() + if save_source: + list_path = main_directory + "/" + save_source + ".txt" + list_file = open(list_path,'a') + list_file.write(path + '\t' + img_src + '\n') + list_file.close() except OSError as e: download_status = 'fail' download_message = "OSError on an image...trying next one..." + " Error: " + str(e) @@ -565,7 +571,7 @@ def download_image_thumbnail(self,image_url,main_directory,dir_name,return_image # Download Images - def download_image(self,image_url,image_format,main_directory,dir_name,count,print_urls,socket_timeout,prefix,print_size,no_numbering,no_download): + def download_image(self,image_url,image_format,main_directory,dir_name,count,print_urls,socket_timeout,prefix,print_size,no_numbering,no_download,save_source,img_src): if print_urls or no_download: print("Image URL: " + image_url) if no_download: @@ -610,6 +616,11 @@ def download_image(self,image_url,image_format,main_directory,dir_name,count,pri output_file = open(path, 'wb') output_file.write(data) output_file.close() + if save_source: + list_path = main_directory + "/" + save_source + ".txt" + list_file = open(list_path,'a') + list_file.write(path + '\t' + img_src + '\n') + list_file.close() absolute_path = os.path.abspath(path) except OSError as e: download_status = 'fail' @@ -729,13 +740,13 @@ def _get_all_items(self,page,main_directory,dir_name,limit,arguments): print("\nImage Metadata: " + str(object)) #download the images - download_status,download_message,return_image_name,absolute_path = self.download_image(object['image_link'],object['image_format'],main_directory,dir_name,count,arguments['print_urls'],arguments['socket_timeout'],arguments['prefix'],arguments['print_size'],arguments['no_numbering'],arguments['no_download']) + download_status,download_message,return_image_name,absolute_path = self.download_image(object['image_link'],object['image_format'],main_directory,dir_name,count,arguments['print_urls'],arguments['socket_timeout'],arguments['prefix'],arguments['print_size'],arguments['no_numbering'],arguments['no_download'],arguments['save_source'],object['image_source']) print(download_message) if download_status == "success": # download image_thumbnails if arguments['thumbnail']: - download_status, download_message_thumbnail = self.download_image_thumbnail(object['image_thumbnail_url'],main_directory,dir_name,return_image_name,arguments['print_urls'],arguments['socket_timeout'],arguments['print_size'],arguments['no_download']) + download_status, download_message_thumbnail = self.download_image_thumbnail(object['image_thumbnail_url'],main_directory,dir_name,return_image_name,arguments['print_urls'],arguments['socket_timeout'],arguments['print_size'],arguments['no_download'],arguments['save_source'],object['image_source']) print(download_message_thumbnail) count += 1 From 18cc56b3ed8e06148fbacaf98b37a24c3f4a635c Mon Sep 17 00:00:00 2001 From: Hardik Vasa Date: Mon, 29 Apr 2019 19:12:22 -0700 Subject: [PATCH 70/83] added encode/decode to work around unicode character issue removed some commented print statements few updates in the readme file --- README.rst | 4 ++-- google_images_download/google_images_download.py | 13 ++++--------- setup.py | 2 +- 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/README.rst b/README.rst index f9af4fa0..e95d618c 100644 --- a/README.rst +++ b/README.rst @@ -184,7 +184,7 @@ Arguments +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | time | w | Denotes the time the image was uploaded/indexed. | | | | | -| | | `Possible values: past-24-hours, past-7-days` | +| | | `Possible values: past-24-hours, past-7-days, past-month, past-year` | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | time_range | wr | Denotes the time range for which you want to search the images | | | | | @@ -252,7 +252,7 @@ Arguments +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | extract_metadata | e | This option allows you to save metadata of all the downloaded images in a JSON file. | | | | | -| | | This file can be found in the ``logs/`` directory. The name of the file would be same as the keyword nam | +| | | This file can be found in the ``logs/`` directory. The name of the file would be same as the keyword name | | | | | | | | This argument does not take any value. Just add '--extract_metadata' or '-e' in your query. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index 996da5df..1670b075 100755 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -88,7 +88,7 @@ def user_input(): parser.add_argument('-t', '--type', help='image type', type=str, required=False, choices=['face','photo','clipart','line-drawing','animated']) parser.add_argument('-w', '--time', help='image age', type=str, required=False, - choices=['past-24-hours','past-7-days','past-month']) + choices=['past-24-hours','past-7-days','past-month','past-year']) parser.add_argument('-wr', '--time_range', help='time range for the age of the image. should be in the format {"time_min":"MM/DD/YYYY","time_max":"MM/DD/YYYY"}', type=str, required=False) parser.add_argument('-a', '--aspect_ratio', help='comma separated additional words added to keywords', type=str, required=False, choices=['tall', 'square', 'wide', 'panoramic']) @@ -241,7 +241,6 @@ def get_next_tab(self,s): url_item_name = url_item_name.replace(',online_chips:',' ') url_item_name = url_item_name.replace('+',' ') - #print(url_item,url_item_name) return url_item,url_item_name,end_content @@ -310,7 +309,7 @@ def single_image(self,image_url): except OSError as e: raise e - print("completed ====> " + image_name) + print("completed ====> " + image_name.encode('raw_unicode_escape').decode('utf-8')) return def similar_images(self,similar_images): @@ -332,7 +331,6 @@ def similar_images(self,similar_images): newurl = "https://www.google.com/search?tbs=sbi:" + urll + "&site=search&sa=X" req2 = urllib.request.Request(newurl, headers=headers) resp2 = urllib.request.urlopen(req2) - # print(resp2.read()) l3 = content.find('/search?sa=X&q=') l4 = content.find(';', l3 + 19) urll2 = content[l3 + 19:l4] @@ -353,10 +351,8 @@ def similar_images(self,similar_images): urll = content[l1:l2] newurl = "https://www.google.com/search?tbs=sbi:" + urll + "&site=search&sa=X" - #print newurl req2 = urllib2.Request(newurl, headers=headers) resp2 = urllib2.urlopen(req2) - # print(resp2.read()) l3 = content.find('/search?sa=X&q=') l4 = content.find(';', l3 + 19) urll2 = content[l3 + 19:l4] @@ -393,7 +389,7 @@ def build_url_parameters(self,arguments): 'usage_rights':[arguments['usage_rights'],{'labeled-for-reuse-with-modifications':'sur:fmc','labeled-for-reuse':'sur:fc','labeled-for-noncommercial-reuse-with-modification':'sur:fm','labeled-for-nocommercial-reuse':'sur:f'}], 'size':[arguments['size'],{'large':'isz:l','medium':'isz:m','icon':'isz:i','>400*300':'isz:lt,islt:qsvga','>640*480':'isz:lt,islt:vga','>800*600':'isz:lt,islt:svga','>1024*768':'visz:lt,islt:xga','>2MP':'isz:lt,islt:2mp','>4MP':'isz:lt,islt:4mp','>6MP':'isz:lt,islt:6mp','>8MP':'isz:lt,islt:8mp','>10MP':'isz:lt,islt:10mp','>12MP':'isz:lt,islt:12mp','>15MP':'isz:lt,islt:15mp','>20MP':'isz:lt,islt:20mp','>40MP':'isz:lt,islt:40mp','>70MP':'isz:lt,islt:70mp'}], 'type':[arguments['type'],{'face':'itp:face','photo':'itp:photo','clipart':'itp:clipart','line-drawing':'itp:lineart','animated':'itp:animated'}], - 'time':[arguments['time'],{'past-24-hours':'qdr:d','past-7-days':'qdr:w','past-month':'qdr:m'}], + 'time':[arguments['time'],{'past-24-hours':'qdr:d','past-7-days':'qdr:w','past-month':'qdr:m','past-year':'qdr:y'}], 'aspect_ratio':[arguments['aspect_ratio'],{'tall':'iar:t','square':'iar:s','wide':'iar:w','panoramic':'iar:xw'}], 'format':[arguments['format'],{'jpg':'ift:jpg','gif':'ift:gif','png':'ift:png','bmp':'ift:bmp','svg':'ift:svg','webp':'webp','ico':'ift:ico'}]} for key, value in params.items(): @@ -433,7 +429,6 @@ def build_search_url(self,search_term,params,url,similar_images,specific_site,sa if safe_search: url = url + safe_search_string - # print(url) return url @@ -853,7 +848,7 @@ def download(self,arguments): i = 0 while i < len(search_keyword): # 2.for every main keyword iteration = "\n" + "Item no.: " + str(i + 1) + " -->" + " Item name = " + (pky) + (search_keyword[i]) + (sky) - print(iteration) + print(iteration.encode('raw_unicode_escape').decode('utf-8')) print("Evaluating...") search_term = pky + search_keyword[i] + sky diff --git a/setup.py b/setup.py index c47bcad5..13a6133a 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from codecs import open from os import path -__version__ = '2.5.0' +__version__ = '2.6.1' here = path.abspath(path.dirname(__file__)) From 77aba45fc0c0f4bc44230d695b893fcec18ce0bf Mon Sep 17 00:00:00 2001 From: Freddie Rice Date: Wed, 1 May 2019 00:53:06 -0400 Subject: [PATCH 71/83] add ability to download raw files (#230) --- google_images_download/google_images_download.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index 1670b075..2aff1401 100755 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -391,7 +391,7 @@ def build_url_parameters(self,arguments): 'type':[arguments['type'],{'face':'itp:face','photo':'itp:photo','clipart':'itp:clipart','line-drawing':'itp:lineart','animated':'itp:animated'}], 'time':[arguments['time'],{'past-24-hours':'qdr:d','past-7-days':'qdr:w','past-month':'qdr:m','past-year':'qdr:y'}], 'aspect_ratio':[arguments['aspect_ratio'],{'tall':'iar:t','square':'iar:s','wide':'iar:w','panoramic':'iar:xw'}], - 'format':[arguments['format'],{'jpg':'ift:jpg','gif':'ift:gif','png':'ift:png','bmp':'ift:bmp','svg':'ift:svg','webp':'webp','ico':'ift:ico'}]} + 'format':[arguments['format'],{'jpg':'ift:jpg','gif':'ift:gif','png':'ift:png','bmp':'ift:bmp','svg':'ift:svg','webp':'webp','ico':'ift:ico','raw':'ift:craw'}]} for key, value in params.items(): if value[0] is not None: ext_param = value[1][value[0]] From 10b8f5b77b2707ee05753c1a73b0938e4a8a2463 Mon Sep 17 00:00:00 2001 From: Hardik Vasa Date: Fri, 10 May 2019 01:03:16 -0700 Subject: [PATCH 72/83] Download only thumbnails as in #206 and #122 better error when downloading of raw page fails update docs for unicode issue with printing paths remove space character in image names and file names fix the issue where keyword was required when config file was given #211 print total number of errors when downloading multiple keywords silent mode download #199 does not create directories when --no_download flag is true --related_images folder/file naming bug in #234 and #232 initiation of Sphinx documentation couple other minor changes --- README.rst | 14 +- docs/.DS_Store | Bin 0 -> 8196 bytes docs/Makefile | 19 ++ docs/README.rst | 1 - docs/arguments.rst | 240 ++++++++++++++++++ docs/conf.py | 52 ++++ docs/contents.rst | 0 docs/examples.rst | 151 +++++++++++ docs/index.rst | 112 ++++++++ docs/installation.rst | 28 ++ docs/make.bat | 35 +++ docs/structure.rst | 7 + docs/troubleshooting.rst | 76 ++++++ docs/usage.rst | 29 +++ .../google_images_download.py | 186 +++++++++----- setup.py | 2 +- 16 files changed, 887 insertions(+), 65 deletions(-) create mode 100644 docs/.DS_Store create mode 100644 docs/Makefile delete mode 100644 docs/README.rst create mode 100644 docs/arguments.rst create mode 100644 docs/conf.py create mode 100644 docs/contents.rst create mode 100644 docs/examples.rst create mode 100644 docs/index.rst create mode 100644 docs/installation.rst create mode 100644 docs/make.bat create mode 100644 docs/structure.rst create mode 100644 docs/troubleshooting.rst create mode 100644 docs/usage.rst diff --git a/README.rst b/README.rst index e95d618c..d1c5515a 100644 --- a/README.rst +++ b/README.rst @@ -143,7 +143,7 @@ Arguments +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | format | f | Denotes the format/extension of the image that you want to download. | | | | | -| | | `Possible values: jpg, gif, png, bmp, svg, webp, ico` | +| | | `Possible values: jpg, gif, png, bmp, svg, webp, ico, raw` | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | color | co | Denotes the color filter that you want to apply to the images. | | | | | @@ -266,6 +266,12 @@ Arguments | | | | | | | This argument does not take any value. Just add '--thumbnail' or '-th' in your query. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| thumbnail_only | tho | Downloads only thumbnails without downloading actual size images | +| | | | +| | | Thumbnails are saved in their own sub-directories inside of the main directory. | +| | | | +| | | This argument does not take any value. Just add '--thumbnail_only' or '-tho' in your query. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | language | la | Defines the language filter. The search results are automatically returned in that language | | | | | | | | `Possible Values: Arabic, Chinese (Simplified), Chinese (Traditional), Czech, Danish, Dutch, English, Estonian. Finnish, | @@ -302,10 +308,14 @@ Arguments | | | | | | | This argument takes a string, name of the text file. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| no_download | nd | Print the URLs of the images on the console without downloading them. These image URLs can be used for debugging purposes | +| no_download | nd | Print the URLs on the console without downloading images or thumbnails. These image URLs can be used for other purposes | | | | | | | | This argument does not take any value. Just add '--no-download' or '-nd' in your query. | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| silent_mode | sil | Remains silent. Does not print notification messages on the terminal/command prompt. | +| | | | +| | | This argument will override all the other print arguments (like print_urls, print_size, etc.) | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | help | h | show the help message regarding the usage of the above arguments | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ diff --git a/docs/.DS_Store b/docs/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..b0b71537b19d82866852277b71a7540e35a3063c GIT binary patch literal 8196 zcmeHML2uJA6n^f?mM|tNF=;#Ig2bVkS`L*agjAJr*Z~Qt2o8X{q|MrhEUB85Zj_>4 z_z&YJ@K3mLENj==v);A`@uZ!)={RY*qu2cO+)W3buD^)ma8Y-? zmw`X&RQ8|5QR)VfKa${}=R@Vqt03ydlU6*AdWjsTtvYPmw(L&jU^=bW>Q(3Xcvf|$ z$BkOmX*3?rX0~;3zyA32?BeRf^yBQ)wX}kSzDDS-dR~-YP|Ml)zCDV z=V3P%82MbV%%W#n|6JaWeqO)COk~I3lefbNp6bF&WB7&=yv5EL>Thm_$s$ P{h$BEAiZFUO=I9UAMf){ literal 0 HcmV?d00001 diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 00000000..298ea9e2 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,19 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file diff --git a/docs/README.rst b/docs/README.rst deleted file mode 100644 index 916c316f..00000000 --- a/docs/README.rst +++ /dev/null @@ -1 +0,0 @@ -Documents coming soon! \ No newline at end of file diff --git a/docs/arguments.rst b/docs/arguments.rst new file mode 100644 index 00000000..2ca516ff --- /dev/null +++ b/docs/arguments.rst @@ -0,0 +1,240 @@ +Arguments +========= + ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| Argument | Short hand | Description | ++===================+=============+===============================================================================================================================+ +| config_file | cf | You can pass the arguments inside a config file. This is an alternative to passing arguments on the command line directly. | +| | | | +| | | Please refer to the | +| | | `config file format `__ below | +| | | | +| | | * If 'config_file' argument is present, the program will use the config file and command line arguments will be discarded | +| | | * Config file can only be in **JSON** format | +| | | * Please refrain from passing invalid arguments from config file. Refer to the below arguments list | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| keywords | k | Denotes the keywords/key phrases you want to search for. For more than one keywords, wrap it in single quotes. | +| | | | +| | | Tips: | +| | | | +| | | * If you simply type the keyword, Google will best try to match it | +| | | * If you want to search for exact phrase, you can wrap the keywords in double quotes ("") | +| | | * If you want to search to contain either of the words provided, use **OR** between the words. | +| | | * If you want to explicitly not want a specific word use a minus sign before the word (-) | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| keywords_from_file| kf | Denotes the file name from where you would want to import the keywords. | +| | | | +| | | Add one keyword per line. Blank/Empty lines are truncated automatically. | +| | | | +| | | Only file types '.txt' or '.csv' are allowed. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| prefix_keywords | pk | Denotes additional words added before main keyword while making the search query. | +| | | | +| | | The final search query would be: | +| | | | +| | | So, for example, if the keyword is 'car' and prefix_keyword is 'red,yellow,blue', it will search and download images for | +| | | 'red car', 'yellow car' and 'blue car' individually | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| suffix_keywords | sk | Denotes additional words added after main keyword while making the search query. | +| | | | +| | | The final search query would be: | +| | | | +| | | So, for example, if the keyword is 'car' and suffix_keyword is 'red,yellow,blue', it will search and download images for | +| | | 'car red', 'car yellow' and 'car blue' individually | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| limit | l | Denotes number of images that you want to download. | +| | | | +| | | You can specify any integer value here. It will try and get all the images that it finds in the google image search page. | +| | | | +| | | If this value is not specified, it defaults to 100. | +| | | | +| | | **Note**: In case of occasional errors while downloading images, you could get less than 100 (if the limit is set to 100) | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| related_images | ri | This argument downloads a ton of images related to the keyword you provided. | +| | | | +| | | Google Images page returns list of related keywords to the keyword you have mentioned in the query. This tool downloads | +| | | images from each of those related keywords based on the limit you have mentioned in your query | +| | | | +| | | This argument does not take any value. Just add '--related_images' or '-ri' in your query. | +| | | | +| | | **Note:** This argument can download hundreds or thousands of additional images so please use this carefully. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| format | f | Denotes the format/extension of the image that you want to download. | +| | | | +| | | `Possible values: jpg, gif, png, bmp, svg, webp, ico, raw` | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| color | co | Denotes the color filter that you want to apply to the images. | +| | | | +| | | `Possible values: red, orange, yellow, green, teal, blue, purple, pink, white, gray, black, brown` | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| color_type | ct | Denotes the color type you want to apply to the images. | +| | | | +| | | `Possible values: full-color, black-and-white, transparent` | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| usage_rights | r | Denotes the usage rights/licence under which the image is classified. | +| | | | +| | | `Possible values:` | +| | | | +| | | * `labeled-for-reuse-with-modifications`, | +| | | * `labeled-for-reuse`, | +| | | * `labeled-for-noncommercial-reuse-with-modification`, | +| | | * `labeled-for-nocommercial-reuse` | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| size | s | Denotes the relative size of the image to be downloaded. | +| | | | +| | | `Possible values: large, medium, icon, >400*300, >640*480, >800*600, >1024*768, >2MP, >4MP, >6MP, >8MP, >10MP, | +| | | >12MP, >15MP, >20MP, >40MP, >70MP` | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| exact_size | es | You can specify the exact size/resolution of the images | +| | | | +| | | This value of this argument can be specified as ```` where the fist integer stands for width of the image | +| | | and the second integer stands for the height of the image. For example, ``-es 1024,786`` | +| | | | +| | | **Note**: You cannot specify both 'size' and 'exact_size' arguments in the same query. You can only give one of them. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| aspect_ratio | a | Denotes the aspect ratio of images to download. | +| | | | +| | | `Possible values: tall, square, wide, panoramic` | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| type | t | Denotes the type of image to be downloaded. | +| | | | +| | | `Possible values: face, photo, clip-art, line-drawing, animated` | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| time | w | Denotes the time the image was uploaded/indexed. | +| | | | +| | | `Possible values: past-24-hours, past-7-days, past-month, past-year` | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| time_range | wr | Denotes the time range for which you want to search the images | +| | | | +| | | The value of this parameter should be in the following format '{"time_min":"MM/DD/YYYY","time_max":"MM/DD/YYYY"}' | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| delay | d | Time to wait between downloading two images | +| | | | +| | | Time is to be specified in seconds. But you can have sub-second times by using decimal points. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| url | u | Allows you search by image when you have the URL from the Google Images page. | +| | | It downloads images from the google images link provided | +| | | | +| | | If you are searching an image on the browser google images page, simply grab the browser URL and paste it in this parameter | +| | | It will download all the images seen on that page. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| single_image | x | Allows you to download one image if the complete (absolute) URL of the image is provided | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| output_directory | o | Allows you specify the main directory name in which the images are downloaded. | +| | | | +| | | If not specified, it will default to 'downloads' directory. This directory is located in the path from where you run this code| +| | | | +| | | The directory structure would look like: ```` | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| image_directory | i | This lets you specify a directory inside of the main directory (output_directory) in which the images will be saved | +| | | | +| | | If not specified, it will default to the name of the keyword. | +| | | | +| | | The directory structure would look like: ```` | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| no_directory | n | This option allows you download images directly in the main directory (output_directory) without an image_directory | +| | | | +| | | The directory structure would look like: ```` | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| proxy | px | Allows you to specify proxy server setting for all your requests | +| | | | +| | | You can specify the proxy settings in 'IP:Port' format | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| similar_images | si | Reverse Image Search or 'Search by Image' as it is referred to on Google. | +| | | | +| | | Searches and downloads images that are similar to the absolute image link/url you provide. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| specific_site | ss | Allows you to download images with keywords only from a specific website/domain name you mention. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| print_urls | p | Print the URLs of the images on the console. These image URLs can be used for debugging purposes | +| | | | +| | | This argument does not take any value. Just add '--print_urls' or '-p' in your query. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| print_size | ps | Prints the size of the images on the console | +| | | | +| | | The size denoted the actual size of the image and not the size of the image on disk | +| | | | +| | | This argument does not take any value. Just add '--print_size' or '-ps' in your query. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| print_paths | pp | Prints the list of all the absolute paths of the downloaded images | +| | | | +| | | When calling the script from another python file, this list will be saved in a variable (as shown in the example below) | +| | | | +| | | This argument also allows you to print the list on the console | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| metadata | m | Prints the metada of the image on the console. | +| | | | +| | | This includes image size, origin, image attributes, description, image URL, etc. | +| | | | +| | | This argument does not take any value. Just add '--metadata' or '-m' in your query. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| extract_metadata | e | This option allows you to save metadata of all the downloaded images in a JSON file. | +| | | | +| | | This file can be found in the ``logs/`` directory. The name of the file would be same as the keyword name | +| | | | +| | | This argument does not take any value. Just add '--extract_metadata' or '-e' in your query. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| socket_timeout | st | Allows you to specify the time to wait for socket connection. | +| | | | +| | | You could specify a higher timeout time for slow internet connection. The default value is 10 seconds. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| thumbnail | th | Downloads image thumbnails corresponding to each image downloaded. | +| | | | +| | | Thumbnails are saved in their own sub-directories inside of the main directory. | +| | | | +| | | This argument does not take any value. Just add '--thumbnail' or '-th' in your query. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| thumbnail_only | tho | Downloads only thumbnails without downloading actual size images | +| | | | +| | | Thumbnails are saved in their own sub-directories inside of the main directory. | +| | | | +| | | This argument does not take any value. Just add '--thumbnail_only' or '-tho' in your query. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| language | la | Defines the language filter. The search results are automatically returned in that language | +| | | | +| | | `Possible Values: Arabic, Chinese (Simplified), Chinese (Traditional), Czech, Danish, Dutch, English, Estonian. Finnish, | +| | | French, German, Greek, Hebrew, Hungarian, Icelandic, Italian, Japanese, Korean, Latvianm, Lithuanian, Norwegian, Portuguese, | +| | | Polish, Romanian, Russian, Spanish, Swedish, Turkish` | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| prefix | pr | A word that you would want to prefix in front of actual image name. | +| | | | +| | | This feature can be used to rename files for image identification purpose. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| chromedriver | cd | With this argument you can pass the path to the 'chromedriver'. | +| | | | +| | | The path looks like this: "path/to/chromedriver". In windows it will be "C:\\path\\to\\chromedriver.exe" | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| safe_search | sa | Searches for images with the Safe Search filter On | +| | | | +| | | And this filter will be Off by default if you do not specify the safe_search argument | +| | | | +| | | This argument does not take any value. Just add '--safe_search' or '-sa' in your query. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| no_numbering | nn | When you specify this argument, the script does not add ordered numbering as prefix to the images it downloads | +| | | | +| | | If this argument is not specified, the images are numbered in order in which they are downloaded | +| | | | +| | | This argument does not take any value. Just add '--no_numbering' or '-nn' in your query. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| offset | of | When you specify this argument, it will skip the offset number of links before it starts downloading images | +| | | | +| | | If this argument is not specified, the script will start downloading form the first link until the limit is reached | +| | | | +| | | This argument takes integer. Make sure the value of this argument is less than the value of limit | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| save_source | is | Creates a text file with list of downloaded images along with their source page paths. | +| | | | +| | | This argument takes a string, name of the text file. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| no_download | nd | Print the URLs on the console without downloading images or thumbnails. These image URLs can be used for other purposes | +| | | | +| | | This argument does not take any value. Just add '--no-download' or '-nd' in your query. | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| silent_mode | sil | Remains silent. Does not print notification messages on the terminal/command prompt. | +| | | | +| | | This argument will override all the other print arguments (like print_urls, print_size, etc.) | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| help | h | show the help message regarding the usage of the above arguments | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ + +**Note:** If ``single_image`` or ``url`` parameter is not present, then keywords is a mandatory parameter. No other parameters are mandatory. \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 00000000..a9af1cf8 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,52 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# http://www.sphinx-doc.org/en/master/config + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) + + +# -- Project information ----------------------------------------------------- + +project = 'google-images-download' +copyright = '2019, Hardik Vasa' +author = 'Hardik Vasa' + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'default' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] diff --git a/docs/contents.rst b/docs/contents.rst new file mode 100644 index 00000000..e69de29b diff --git a/docs/examples.rst b/docs/examples.rst new file mode 100644 index 00000000..509e3952 --- /dev/null +++ b/docs/examples.rst @@ -0,0 +1,151 @@ +Config File Format +================== + +You can either pass the arguments directly from the command as in the examples below or you can pass it through a config file. Below is a sample of how a config +file looks. + +You can pass more than one record through a config file. The below sample consist of two set of records. The code will iterate through each of the record and +download images based on arguments passed. + +.. code:: json + + { + "Records": [ + { + "keywords": "apple", + "limit": 5, + "color": "green", + "print_urls": true + }, + { + "keywords": "universe", + "limit": 15, + "size": "large", + "print_urls": true + } + ] + } + + +Command line examples +===================== + +- If you are calling this library from another python file, below is the sample code + +.. code-block:: python + + from google_images_download import google_images_download #importing the library + + response = google_images_download.googleimagesdownload() #class instantiation + + arguments = {"keywords":"Polar bears,baloons,Beaches","limit":20,"print_urls":True} #creating list of arguments + paths = response.download(arguments) #passing the arguments to the function + print(paths) #printing absolute paths of the downloaded images + +- If you are passing arguments from a config file, simply pass the config_file argument with name of your JSON file + +.. code-block:: bash + + $ googleimagesdownload -cf example.json + +- Simple example of using keywords and limit arguments + +.. code-block:: bash + + $ googleimagesdownload --keywords "Polar bears, baloons, Beaches" --limit 20 + +- Using Suffix Keywords allows you to specify words after the main + keywords. For example if the ``keyword = car`` and + ``suffix keyword = 'red,blue'`` then it will first search for + ``car red`` and then ``car blue`` + +.. code-block:: bash + + $ googleimagesdownload --k "car" -sk 'red,blue,white' -l 10 + +- To use the short hand command + +.. code-block:: bash + + $ googleimagesdownload -k "Polar bears, baloons, Beaches" -l 20 + +- To download images with specific image extension/format + +.. code-block:: bash + + $ googleimagesdownload --keywords "logo" --format svg + +- To use color filters for the images + +.. code-block:: bash + + $ googleimagesdownload -k "playground" -l 20 -co red + +- To use non-English keywords for image search + +.. code-block:: bash + + $ googleimagesdownload -k "北极熊" -l 5 + +- To download images from the google images link + +.. code-block:: bash + + $ googleimagesdownload -k "sample" -u + +- To save images in specific main directory (instead of in 'downloads') + +.. code-block:: bash + + $ googleimagesdownload -k "boat" -o "boat_new" + +- To download one single image with the image URL + +.. code-block:: bash + + $ googleimagesdownload --keywords "baloons" --single_image + +- To download images with size and type constrains + +.. code-block:: bash + + $ googleimagesdownload --keywords "baloons" --size medium --type animated + +- To download images with specific usage rights + +.. code-block:: bash + + $ googleimagesdownload --keywords "universe" --usage_rights labeled-for-reuse + +- To download images with specific color type + +.. code-block:: bash + + $ googleimagesdownload --keywords "flowers" --color_type black-and-white + +- To download images with specific aspect ratio + +.. code-block:: bash + + $ googleimagesdownload --keywords "universe" --aspect_ratio panoramic + +- To download images which are similar to the image in the image URL that you provided (Reverse Image search). + +.. code-block:: bash + + $ googleimagesdownload -si -l 10 + +- To download images from specific website or domain name for a given keyword + +.. code-block:: bash + + $ googleimagesdownload --keywords "universe" --specific_site example.com + +===> The images would be downloaded in their own sub-directories inside the main directory +(either the one you provided or in 'downloads') in the same folder you are in. + + +Library extensions +================== + +Coming soon! \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 00000000..71e4741a --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,112 @@ +Google Images Download Documentation +==================================== + +Summary +------- + +This is a command line python program to search keywords/key-phrases on Google Images +and optionally download images to your computer. You can also invoke this script from +another python file. + +This is a small and ready-to-run program. No dependencies are required to be installed +if you would only want to download up to 100 images per keyword. If you would want **more than 100 +images** per keyword, then you would need to install ``Selenium`` library along with ``chromedriver``. +Detailed instructions in the troubleshooting section. + + +Compatibility +------------- + +This program is compatible with both the versions of python - 2.x and 3.x (recommended). +It is a download-and-run program with no changes to the file. +You will just have to specify parameters through the command line. + + +Installation +------------ + +The guide provides detailed instructions on how to install the library. + +.. toctree:: + :maxdepth: 2 + + installation + + +Usage +----- + +The following section provides details on using the library - from CLI or by standard imports. + +.. toctree:: + :maxdepth: 2 + + usage + +Arguments +--------- + +This section provides all the arguments/parameters/options you can provide to this library. + +.. toctree:: + :maxdepth: 2 + + arguments + +Examples +-------- + +Many examples have been provided to help new users quickly ramp up the the usage. + +.. toctree:: + :maxdepth: 2 + + examples + +Troubleshooting +--------------- + +This section proviedes troubleshooting guide for commonly seen issues. + +.. toctree:: + :maxdepth: 2 + + troubleshooting + +Workflow +-------- + +Workflow showcases the algorithm used within this module to download the images. + +.. toctree:: + :maxdepth: 2 + + structure + + +Contribute +---------- + +Anyone is welcomed to contribute to this script. +If you would like to make a change, open a pull request. +For issues and discussion visit the +`Issue Tracker `__. + +The aim of this repo is to keep it simple, stand-alone, backward compatible and 3rd party dependency proof. + + +Disclaimer +---------- + +This program lets you download tons of images from Google. +Please do not download or use any image that violates its copyright terms. +Google Images is a search engine that merely indexes images and allows you to find them. +It does NOT produce its own images and, as such, it doesn't own copyright on any of them. +The original creators of the images own the copyrights. + +Images published in the United States are automatically copyrighted by their owners, +even if they do not explicitly carry a copyright warning. +You may not reproduce copyright images without their owner's permission, +except in "fair use" cases, +or you could risk running into lawyer's warnings, cease-and-desist letters, and copyright suits. +Please be very careful before its usage! diff --git a/docs/installation.rst b/docs/installation.rst new file mode 100644 index 00000000..c7114020 --- /dev/null +++ b/docs/installation.rst @@ -0,0 +1,28 @@ +.. _installation: + +Installation +============ + +You can use **one of the below methods** to download and use this repository. + +Install using pip +----------------- + +.. code-block:: bash + + $ pip install google_images_download + + +Manually install using CLI +-------------------------- + +.. code-block:: bash + + $ git clone https://github.com/hardikvasa/google-images-download.git + $ cd google-images-download && sudo python setup.py install + + +Manually install using UI +------------------------- + +Go to the `repo on github `__ ==> Click on 'Clone or Download' ==> Click on 'Download ZIP' and save it on your local disk. \ No newline at end of file diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 00000000..27f573b8 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% + +:end +popd diff --git a/docs/structure.rst b/docs/structure.rst new file mode 100644 index 00000000..0c18e049 --- /dev/null +++ b/docs/structure.rst @@ -0,0 +1,7 @@ +Structure +========= + +Below diagram represents the algorithm logic to download images. + +.. figure:: http://www.zseries.in/flow-chart.png + :alt: \ No newline at end of file diff --git a/docs/troubleshooting.rst b/docs/troubleshooting.rst new file mode 100644 index 00000000..e6c7d6b8 --- /dev/null +++ b/docs/troubleshooting.rst @@ -0,0 +1,76 @@ +Troubleshooting Errors/Issues +============================= + +SSL Errors +---------- + +If you do see SSL errors on Mac for Python 3, +please go to Finder —> Applications —> Python 3 —> Click on the ‘Install Certificates.command’ +and run the file. + +googleimagesdownload: command not found +--------------------------------------- + +While using the above commands, if you get ``Error: -bash: googleimagesdownload: command not found`` then you have to set the correct path variable. + +To get the details of the repo, run the following command: + +.. code-block:: bash + + $ pip show -f google_images_download + +you will get the result like this: + +.. code-block:: bash + + Location: /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages + Files: + ../../../bin/googleimagesdownload + +together they make: ``/Library/Frameworks/Python.framework/Versions/2.7/bin`` which you need add it to the path: + +.. code-block:: bash + + $ export PATH="/Library/Frameworks/Python.framework/Versions/2.7/bin" + + +[Errno 13] Permission denied creating directory 'downloads' +----------------------------------------------------------- + +When you run the command, it downloads the images in the current directory (the directory from where you are running the command). If you get permission denied error for creating the `downloads directory`, then move to a directory in which you have the write permission and then run the command again. + + +Permission denied while installing the library +---------------------------------------------- + +On MAC and Linux, when you get permission denied when installing the library using pip, try doing a user install. + +.. code-block:: bash + + $ pip install google_images_download --user + +You can also run pip install as a superuser with ``sudo pip install google_images_download`` but it is not generally a good idea because it can cause issues with your system-level packages. + + +Installing the chromedriver (with Selenium) +------------------------------------------- + +If you would want to download more than 100 images per keyword, then you will need to install 'selenium' library along with 'chromedriver' extension. + +If you have pip-installed the library or had run the setup.py file, Selenium would have automatically installed on your machine. You will also need Chrome browser on your machine. For chromedriver: + +`Download the correct chromedriver `__ based on your operating system. + +On **Windows** or **MAC** if for some reason the chromedriver gives you trouble, download it under the current directory and run the command. + +On windows however, the path to chromedriver has to be given in the following format: + +``C:\\complete\\path\\to\\chromedriver.exe`` + +On **Linux** if you are having issues installing google chrome browser, refer to this `CentOS or Amazon Linux Guide `__ +or `Ubuntu Guide `__ + +For **All the operating systems** you will have to use '--chromedriver' or '-cd' argument to specify the path of +chromedriver that you have downloaded in your machine. + +If on any rare occasion the chromedriver does not work for you, try downgrading it to a lower version. diff --git a/docs/usage.rst b/docs/usage.rst new file mode 100644 index 00000000..40a3acda --- /dev/null +++ b/docs/usage.rst @@ -0,0 +1,29 @@ +Using the library from Command Line Interface +============================================= + +If installed via pip or using CLI, use the following command: + +.. code-block:: bash + + $ googleimagesdownload [Arguments...] + +If downloaded via the UI, unzip the file downloaded, go to the 'google_images_download' directory and use one of the below commands: + +.. code-block:: bash + + $ python3 google_images_download.py [Arguments...] + OR + $ python google_images_download.py [Arguments...] + + +Using the library from another python file +========================================== + +If you would want to use this library from another python file, you could use it as shown below: + +.. code-block:: python + + from google_images_download import google_images_download + + response = google_images_download.googleimagesdownload() + absolute_image_paths = response.download({}) \ No newline at end of file diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index 2aff1401..5848b85a 100755 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -39,8 +39,8 @@ "exact_size", "aspect_ratio", "type", "time", "time_range", "delay", "url", "single_image", "output_directory", "image_directory", "no_directory", "proxy", "similar_images", "specific_site", "print_urls", "print_size", "print_paths", "metadata", "extract_metadata", "socket_timeout", - "thumbnail", "language", "prefix", "chromedriver", "related_images", "safe_search", "no_numbering", - "offset", "no_download","save_source"] + "thumbnail", "thumbnail_only", "language", "prefix", "chromedriver", "related_images", "safe_search", "no_numbering", + "offset", "no_download","save_source","silent_mode"] def user_input(): @@ -101,6 +101,7 @@ def user_input(): parser.add_argument('-e', '--extract_metadata', default=False, help="Dumps all the logs into a text file", action="store_true") parser.add_argument('-st', '--socket_timeout', default=False, help="Connection timeout waiting for the image to download", type=float) parser.add_argument('-th', '--thumbnail', default=False, help="Downloads image thumbnail along with the actual image", action="store_true") + parser.add_argument('-tho', '--thumbnail_only', default=False, help="Downloads only thumbnail without downloading actual images", action="store_true") parser.add_argument('-la', '--language', default=False, help="Defines the language filter. The search results are authomatically returned in that language", type=str, required=False, choices=['Arabic','Chinese (Simplified)','Chinese (Traditional)','Czech','Danish','Dutch','English','Estonian','Finnish','French','German','Greek','Hebrew','Hungarian','Icelandic','Italian','Japanese','Korean','Latvian','Lithuanian','Norwegian','Portuguese','Polish','Romanian','Russian','Spanish','Swedish','Turkish']) parser.add_argument('-pr', '--prefix', default=False, help="A word that you would want to prefix in front of each image name", type=str, required=False) @@ -111,6 +112,7 @@ def user_input(): parser.add_argument('-nn', '--no_numbering', default=False, help="Allows you to exclude the default numbering of images", action="store_true") parser.add_argument('-of', '--offset', help="Where to start in the fetched links", type=str, required=False) parser.add_argument('-nd', '--no_download', default=False, help="Prints the URLs of the images and/or thumbnails without downloading them", action="store_true") + parser.add_argument('-sil', '--silent_mode', default=False, help="Remains silent. Does not print notification messages on the terminal", action="store_true") parser.add_argument('-is', '--save_source', help="creates a text file containing a list of downloaded images along with source page url", type=str, required=False) args = parser.parse_args() @@ -137,7 +139,9 @@ def download_page(self,url): respData = str(resp.read()) return respData except Exception as e: - print("Could not open URL. Please check your internet connection and/or ssl settings") + print("Could not open URL. Please check your internet connection and/or ssl settings \n" + "If you are using proxy, make sure your proxy settings is configured correctly") + sys.exit() else: # If the Current Version of Python is 2.x try: headers = {} @@ -151,7 +155,9 @@ def download_page(self,url): page = response.read() return page except: - print("Could not open URL. Please check your internet connection and/or ssl settings") + print("Could not open URL. Please check your internet connection and/or ssl settings \n" + "If you are using proxy, make sure your proxy settings is configured correctly") + sys.exit() return "Page Not found" @@ -226,7 +232,7 @@ def get_next_tab(self,s): start_line = s.find('class="dtviD"') start_content = s.find('href="', start_line + 1) end_content = s.find('">', start_content + 1) - url_item = "https://www.google.com" + str(s[start_content+6:end_content]) + url_item = "https://www.google.com" + str(s[start_content + 6:end_content]) url_item = url_item.replace('&', '&') start_line_2 = s.find('class="dtviD"') @@ -234,14 +240,15 @@ def get_next_tab(self,s): start_content_2 = s.find(':', start_line_2 + 1) end_content_2 = s.find('&usg=', start_content_2 + 1) url_item_name = str(s[start_content_2 + 1:end_content_2]) - if url_item_name[-3:] == '%3D': - end_content_3 = url_item_name.rfind(':') - url_item_name = url_item_name[:end_content_3] - url_item_name = url_item_name.replace(',g_1:',' ') - url_item_name = url_item_name.replace(',online_chips:',' ') - url_item_name = url_item_name.replace('+',' ') - return url_item,url_item_name,end_content + chars = url_item_name.find(',g_1:') + chars_end = url_item_name.find(":", chars + 6) + if chars_end == -1: + updated_item_name = (url_item_name[chars + 5:]).replace("+", " ") + else: + updated_item_name = (url_item_name[chars+5:chars_end]).replace("+", " ") + + return url_item, updated_item_name, end_content # Getting all links with the help of '_images_get_next_image' @@ -252,9 +259,12 @@ def get_all_tabs(self,page): if item == "no_tabs": break else: - tabs[item_name] = item # Append all the links in the list named 'Links' - time.sleep(0.1) # Timer could be used to slow down the request for image downloads - page = page[end_content:] + if len(item_name) > 100 or item_name == "background-color": + break + else: + tabs[item_name] = item # Append all the links in the list named 'Links' + time.sleep(0.1) # Timer could be used to slow down the request for image downloads + page = page[end_content:] return tabs @@ -308,7 +318,6 @@ def single_image(self,image_url): raise e except OSError as e: raise e - print("completed ====> " + image_name.encode('raw_unicode_escape').decode('utf-8')) return @@ -466,7 +475,7 @@ def keywords_from_file(self,file_name): return search_keyword # make directories - def create_directories(self,main_directory, dir_name,thumbnail): + def create_directories(self,main_directory, dir_name,thumbnail,thumbnail_only): dir_name_thumbnail = dir_name + " - thumbnail" # make a search keyword directory try: @@ -477,7 +486,7 @@ def create_directories(self,main_directory, dir_name,thumbnail): sub_directory = os.path.join(main_directory, path) if not os.path.exists(sub_directory): os.makedirs(sub_directory) - if thumbnail: + if thumbnail or thumbnail_only: sub_directory_thumbnail = os.path.join(main_directory, dir_name_thumbnail) if not os.path.exists(sub_directory_thumbnail): os.makedirs(sub_directory_thumbnail) @@ -486,14 +495,13 @@ def create_directories(self,main_directory, dir_name,thumbnail): sub_directory = os.path.join(main_directory, path) if not os.path.exists(sub_directory): os.makedirs(sub_directory) - if thumbnail: + if thumbnail or thumbnail_only: sub_directory_thumbnail = os.path.join(main_directory, dir_name_thumbnail) if not os.path.exists(sub_directory_thumbnail): os.makedirs(sub_directory_thumbnail) except OSError as e: if e.errno != 17: raise - # time.sleep might help here pass return @@ -566,9 +574,12 @@ def download_image_thumbnail(self,image_url,main_directory,dir_name,return_image # Download Images - def download_image(self,image_url,image_format,main_directory,dir_name,count,print_urls,socket_timeout,prefix,print_size,no_numbering,no_download,save_source,img_src): - if print_urls or no_download: - print("Image URL: " + image_url) + def download_image(self,image_url,image_format,main_directory,dir_name,count,print_urls,socket_timeout,prefix,print_size,no_numbering,no_download,save_source,img_src,silent_mode,thumbnail_only): + if thumbnail_only: + return "success", "Skipping image download...", str(image_url[(image_url.rfind('/')) + 1:]), image_url + if not silent_mode: + if print_urls or no_download: + print("Image URL: " + image_url) if no_download: return "success","Printed url without downloading",None,image_url try: @@ -605,7 +616,7 @@ def download_image(self,image_url,image_format,main_directory,dir_name,count,pri if no_numbering: path = main_directory + "/" + dir_name + "/" + prefix + image_name else: - path = main_directory + "/" + dir_name + "/" + prefix + str(count) + ". " + image_name + path = main_directory + "/" + dir_name + "/" + prefix + str(count) + "." + image_name try: output_file = open(path, 'wb') @@ -625,12 +636,13 @@ def download_image(self,image_url,image_format,main_directory,dir_name,count,pri #return image name back to calling method to use it for thumbnail downloads download_status = 'success' - download_message = "Completed Image ====> " + prefix + str(count) + ". " + image_name - return_image_name = prefix + str(count) + ". " + image_name + download_message = "Completed Image ====> " + prefix + str(count) + "." + image_name + return_image_name = prefix + str(count) + "." + image_name # image size parameter - if print_size: - print("Image Size: " + str(self.file_size(path))) + if not silent_mode: + if print_size: + print("Image Size: " + str(self.file_size(path))) except UnicodeEncodeError as e: download_status = 'fail' @@ -732,17 +744,20 @@ def _get_all_items(self,page,main_directory,dir_name,limit,arguments): #format the item for readability object = self.format_object(object) if arguments['metadata']: - print("\nImage Metadata: " + str(object)) + if not arguments["silent_mode"]: + print("\nImage Metadata: " + str(object)) #download the images - download_status,download_message,return_image_name,absolute_path = self.download_image(object['image_link'],object['image_format'],main_directory,dir_name,count,arguments['print_urls'],arguments['socket_timeout'],arguments['prefix'],arguments['print_size'],arguments['no_numbering'],arguments['no_download'],arguments['save_source'],object['image_source']) - print(download_message) + download_status,download_message,return_image_name,absolute_path = self.download_image(object['image_link'],object['image_format'],main_directory,dir_name,count,arguments['print_urls'],arguments['socket_timeout'],arguments['prefix'],arguments['print_size'],arguments['no_numbering'],arguments['no_download'],arguments['save_source'],object['image_source'],arguments["silent_mode"],arguments["thumbnail_only"]) + if not arguments["silent_mode"]: + print(download_message) if download_status == "success": # download image_thumbnails - if arguments['thumbnail']: + if arguments['thumbnail'] or arguments["thumbnail_only"]: download_status, download_message_thumbnail = self.download_image_thumbnail(object['image_thumbnail_url'],main_directory,dir_name,return_image_name,arguments['print_urls'],arguments['socket_timeout'],arguments['print_size'],arguments['no_download'],arguments['save_source'],object['image_source']) - print(download_message_thumbnail) + if not arguments["silent_mode"]: + print(download_message_thumbnail) count += 1 object['image_filename'] = return_image_name @@ -766,13 +781,55 @@ def _get_all_items(self,page,main_directory,dir_name,limit,arguments): # Bulk Download def download(self,arguments): - - #for input coming from other python files + paths_agg = {} + # for input coming from other python files if __name__ != "__main__": - for arg in args_list: - if arg not in arguments: - arguments[arg] = None - + # if the calling file contains config_file param + if 'config_file' in arguments: + records = [] + json_file = json.load(open(arguments['config_file'])) + for record in range(0, len(json_file['Records'])): + arguments = {} + for i in args_list: + arguments[i] = None + for key, value in json_file['Records'][record].items(): + arguments[key] = value + records.append(arguments) + total_errors = 0 + for rec in records: + paths, errors = self.download_executor(rec) + for i in paths: + paths_agg[i] = paths[i] + if not arguments["silent_mode"]: + if arguments['print_paths']: + print(paths.encode('raw_unicode_escape').decode('utf-8')) + total_errors = total_errors + errors + return paths_agg,total_errors + # if the calling file contains params directly + else: + paths, errors = self.download_executor(arguments) + for i in paths: + paths_agg[i] = paths[i] + if not arguments["silent_mode"]: + if arguments['print_paths']: + print(paths.encode('raw_unicode_escape').decode('utf-8')) + return paths_agg, errors + # for input coming from CLI + else: + paths, errors = self.download_executor(arguments) + for i in paths: + paths_agg[i] = paths[i] + if not arguments["silent_mode"]: + if arguments['print_paths']: + print(paths.encode('raw_unicode_escape').decode('utf-8')) + return paths_agg, errors + + def download_executor(self,arguments): + paths = {} + errorCount = None + for arg in args_list: + if arg not in arguments: + arguments[arg] = None ######Initialization and Validation of user arguments if arguments['keywords']: search_keyword = [str(item) for item in arguments['keywords'].split(',')] @@ -829,7 +886,6 @@ def download(self,arguments): '-------------------------------') sys.exit() - # If this argument is present, set the custom output directory if arguments['output_directory']: main_directory = arguments['output_directory'] @@ -841,15 +897,17 @@ def download(self,arguments): os.environ["http_proxy"] = arguments['proxy'] os.environ["https_proxy"] = arguments['proxy'] ######Initialization Complete - - paths = {} - for pky in prefix_keywords: - for sky in suffix_keywords: # 1.for every suffix keywords + total_errors = 0 + for pky in prefix_keywords: # 1.for every prefix keywords + for sky in suffix_keywords: # 2.for every suffix keywords i = 0 - while i < len(search_keyword): # 2.for every main keyword + while i < len(search_keyword): # 3.for every main keyword iteration = "\n" + "Item no.: " + str(i + 1) + " -->" + " Item name = " + (pky) + (search_keyword[i]) + (sky) - print(iteration.encode('raw_unicode_escape').decode('utf-8')) - print("Evaluating...") + if not arguments["silent_mode"]: + print(iteration.encode('raw_unicode_escape').decode('utf-8')) + print("Evaluating...") + else: + print("Downloading images for: " + (pky) + (search_keyword[i]) + (sky) + " ...") search_term = pky + search_keyword[i] + sky if arguments['image_directory']: @@ -859,7 +917,8 @@ def download(self,arguments): else: dir_name = search_term + ('-' + arguments['color'] if arguments['color'] else '') #sub-directory - self.create_directories(main_directory,dir_name,arguments['thumbnail']) #create directories in OS + if not arguments["no_download"]: + self.create_directories(main_directory,dir_name,arguments['thumbnail'],arguments['thumbnail_only']) #create directories in OS params = self.build_url_parameters(arguments) #building URL with params @@ -870,10 +929,11 @@ def download(self,arguments): else: raw_html = self.download_extended_page(url,arguments['chromedriver']) - if arguments['no_download']: - print("Starting to Print Image URLS") - else: - print("Starting Download...") + if not arguments["silent_mode"]: + if arguments['no_download']: + print("Getting URLs without downloading images...") + else: + print("Starting Download...") items,errorCount,abs_path = self._get_all_items(raw_html,main_directory,dir_name,limit,arguments) #get all image items and download images paths[pky + search_keyword[i] + sky] = abs_path @@ -899,31 +959,35 @@ def download(self,arguments): new_raw_html = self.download_page(value) # download page else: new_raw_html = self.download_extended_page(value,arguments['chromedriver']) - self.create_directories(main_directory, final_search_term,arguments['thumbnail']) + self.create_directories(main_directory, final_search_term,arguments['thumbnail'],arguments['thumbnail_only']) self._get_all_items(new_raw_html, main_directory, search_term + " - " + key, limit,arguments) i += 1 - print("\nErrors: " + str(errorCount) + "\n") - if arguments['print_paths']: - print(paths) - return paths + total_errors = total_errors + errorCount + if not arguments["silent_mode"]: + print("\nErrors: " + str(errorCount) + "\n") + return paths, total_errors #------------- Main Program -------------# def main(): records = user_input() + total_errors = 0 + t0 = time.time() # start the timer for arguments in records: if arguments['single_image']: # Download Single Image using a URL response = googleimagesdownload() response.single_image(arguments['single_image']) else: # or download multiple images based on keywords/keyphrase search - t0 = time.time() # start the timer response = googleimagesdownload() - paths = response.download(arguments) #wrapping response in a variable just for consistency + paths,errors = response.download(arguments) #wrapping response in a variable just for consistency + total_errors = total_errors + errors + t1 = time.time() # stop the timer + total_time = t1 - t0 # Calculating the total time required to crawl, find and download all the links of 60,000 images + if not arguments["silent_mode"]: print("\nEverything downloaded!") - t1 = time.time() # stop the timer - total_time = t1 - t0 # Calculating the total time required to crawl, find and download all the links of 60,000 images + print("Total errors: " + str(total_errors)) print("Total time taken: " + str(total_time) + " Seconds") if __name__ == "__main__": diff --git a/setup.py b/setup.py index 13a6133a..0a5d682b 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from codecs import open from os import path -__version__ = '2.6.1' +__version__ = '2.7.0' here = path.abspath(path.dirname(__file__)) From 25ae5278d20b8af4252a7dc9c6cbba6d01a696cd Mon Sep 17 00:00:00 2001 From: Hardik Vasa Date: Fri, 10 May 2019 02:11:12 -0700 Subject: [PATCH 73/83] minor sphinx doc update --- docs/index.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 71e4741a..e38d7ed4 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,6 +1,3 @@ -Google Images Download Documentation -==================================== - Summary ------- From f7ea870c7a13fb7dc094366b991ce2ed5ba28402 Mon Sep 17 00:00:00 2001 From: Hardik Vasa Date: Sat, 11 May 2019 21:19:00 -0700 Subject: [PATCH 74/83] minor doc CSS update --- docs/conf.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index a9af1cf8..d66c7595 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -15,9 +15,17 @@ # sys.path.insert(0, os.path.abspath('.')) +html_static_path = ['_static'] + +html_context = { + 'css_files': [ + '_static/theme_overrides.css', # override wide tables in RTD theme + ], + } + # -- Project information ----------------------------------------------------- -project = 'google-images-download' +project = 'Google Images Download' copyright = '2019, Hardik Vasa' author = 'Hardik Vasa' From f694cce7f54943d4d15cf9900f6d3148b0c4ed31 Mon Sep 17 00:00:00 2001 From: Hardik Vasa Date: Sat, 11 May 2019 21:48:46 -0700 Subject: [PATCH 75/83] minor doc CSS update --- docs/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index d66c7595..ceffcaf2 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -19,7 +19,7 @@ html_context = { 'css_files': [ - '_static/theme_overrides.css', # override wide tables in RTD theme + '_static/overrides.css', # override wide tables in RTD theme ], } From b1bfee2449986678d6cd30f9768c8c9eab9f4846 Mon Sep 17 00:00:00 2001 From: Hardik Vasa Date: Sat, 11 May 2019 22:16:11 -0700 Subject: [PATCH 76/83] minor doc CSS updates --- docs/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index ceffcaf2..600ec18c 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -52,7 +52,7 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'default' +html_theme = 'bizstyle' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, From cf5ff4876b8384ef0d92e68b66fa648e494d2640 Mon Sep 17 00:00:00 2001 From: Hardik Vasa Date: Mon, 13 May 2019 15:25:33 -0700 Subject: [PATCH 77/83] fix the corrupt image bug fix the bug where wrong image format is downloaded addresses #81 #183 #189 #213 sphinx doc updates --- .gitignore | 3 + docs/_static/.DS_Store | Bin 0 -> 6148 bytes docs/_static/overrides.css | 14 ++++ docs/arguments.rst | 7 +- docs/conf.py | 11 ++- docs/contents.rst | 0 docs/examples.rst | 20 ++++- docs/index.rst | 76 +++++++++++------- docs/installation.rst | 3 +- docs/structure.rst | 5 +- docs/troubleshooting.rst | 11 +-- docs/usage.rst | 4 + .../google_images_download.py | 29 ++++--- 13 files changed, 133 insertions(+), 50 deletions(-) create mode 100644 docs/_static/.DS_Store create mode 100644 docs/_static/overrides.css delete mode 100644 docs/contents.rst diff --git a/.gitignore b/.gitignore index bfbb05d0..48de0972 100644 --- a/.gitignore +++ b/.gitignore @@ -40,6 +40,8 @@ output/*/index.html # Sphinx docs/_build +docs/.DS_Store +docs/_static/* # Cookiecutter output/ @@ -49,3 +51,4 @@ downloads/ # Logs logs/ + diff --git a/docs/_static/.DS_Store b/docs/_static/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..12e84d138d67e0b8ba6bb3e74306fff687a7dbbe GIT binary patch literal 6148 zcmeHKJxc>o5S-N%0TEJKzS2stwmCv9to;L$#6k!hBxrq|KRcQIP&`fx8w;6*-Fa_s z=e^t%w^;zTeBIsx3jlMvBi=ns&7ZqZ?5r|Ir1OkDUa-Y84!D_Af1hyfC3?J(wIA^} zJmv7P>o@CNzh;&H#5?-+F^nsj6p#W^Knh3!De!v*y!X=P7m12eKnh5KPX+w@(CCg` z;gA@g4u%*3h%=_cxQbk-qG3l_lndj8aRuhWF z?aa3*hjodHQa}of6*$lB!u$U{{fGH~OwvvYNP&N)fX!Bqt0iBldh6unyw^7R6aB+j o8|4hqiiy#Rx$#zf^`)-)n$Nq!Au;I82c4)t0`__. + +.. code:: python + + import os + from PIL import Image + + img_dir = r"path/to/downloads/directory" + for filename in os.listdir(img_dir): + try : + with Image.open(img_dir + "/" + filename) as im: + print('ok') + except : + print(img_dir + "/" + filename) + os.remove(img_dir + "/" + filename) diff --git a/docs/index.rst b/docs/index.rst index e38d7ed4..6f32a432 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,5 +1,11 @@ +====================== +Google Images Download +====================== + +.. index:: Summary + Summary -------- +======= This is a command line python program to search keywords/key-phrases on Google Images and optionally download images to your computer. You can also invoke this script from @@ -11,57 +17,67 @@ images** per keyword, then you would need to install ``Selenium`` library along Detailed instructions in the troubleshooting section. +.. index:: Compatability + Compatibility -------------- +============= This program is compatible with both the versions of python - 2.x and 3.x (recommended). It is a download-and-run program with no changes to the file. You will just have to specify parameters through the command line. +.. index:: Installation Installation ------------- +============ The guide provides detailed instructions on how to install the library. .. toctree:: - :maxdepth: 2 + :maxdepth: 3 installation +.. index:: Usage Usage ------ +===== The following section provides details on using the library - from CLI or by standard imports. .. toctree:: - :maxdepth: 2 + :maxdepth: 3 usage +.. index:: Arguments + Arguments ---------- +========= This section provides all the arguments/parameters/options you can provide to this library. .. toctree:: - :maxdepth: 2 + :maxdepth: 3 arguments +.. index:: Examples + Examples --------- +======== Many examples have been provided to help new users quickly ramp up the the usage. .. toctree:: - :maxdepth: 2 + :maxdepth: 3 examples +.. index:: Troubleshooting + Troubleshooting ---------------- +=============== This section proviedes troubleshooting guide for commonly seen issues. @@ -70,8 +86,10 @@ This section proviedes troubleshooting guide for commonly seen issues. troubleshooting +.. index:: Workflow + Workflow --------- +======== Workflow showcases the algorithm used within this module to download the images. @@ -80,9 +98,10 @@ Workflow showcases the algorithm used within this module to download the images. structure +.. index:: Contribute Contribute ----------- +========== Anyone is welcomed to contribute to this script. If you would like to make a change, open a pull request. @@ -91,19 +110,22 @@ For issues and discussion visit the The aim of this repo is to keep it simple, stand-alone, backward compatible and 3rd party dependency proof. +.. index:: Disclaimer Disclaimer ----------- - -This program lets you download tons of images from Google. -Please do not download or use any image that violates its copyright terms. -Google Images is a search engine that merely indexes images and allows you to find them. -It does NOT produce its own images and, as such, it doesn't own copyright on any of them. -The original creators of the images own the copyrights. - -Images published in the United States are automatically copyrighted by their owners, -even if they do not explicitly carry a copyright warning. -You may not reproduce copyright images without their owner's permission, -except in "fair use" cases, -or you could risk running into lawyer's warnings, cease-and-desist letters, and copyright suits. -Please be very careful before its usage! +========== + +.. warning:: + + This program lets you download tons of images from Google. + Please do not download or use any image that violates its copyright terms. + Google Images is a search engine that merely indexes images and allows you to find them. + It does NOT produce its own images and, as such, it doesn't own copyright on any of them. + The original creators of the images own the copyrights. + + Images published in the United States are automatically copyrighted by their owners, + even if they do not explicitly carry a copyright warning. + You may not reproduce copyright images without their owner's permission, + except in "fair use" cases, + or you could risk running into lawyer's warnings, cease-and-desist letters, and copyright suits. + Please be very careful before its usage! diff --git a/docs/installation.rst b/docs/installation.rst index c7114020..0e1362f5 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -1,5 +1,4 @@ -.. _installation: - +============ Installation ============ diff --git a/docs/structure.rst b/docs/structure.rst index 0c18e049..77e93faf 100644 --- a/docs/structure.rst +++ b/docs/structure.rst @@ -1,5 +1,6 @@ -Structure -========= +======== +Workflow +======== Below diagram represents the algorithm logic to download images. diff --git a/docs/troubleshooting.rst b/docs/troubleshooting.rst index e6c7d6b8..4e48e979 100644 --- a/docs/troubleshooting.rst +++ b/docs/troubleshooting.rst @@ -1,15 +1,16 @@ +============================= Troubleshooting Errors/Issues ============================= SSL Errors ----------- +========== If you do see SSL errors on Mac for Python 3, please go to Finder —> Applications —> Python 3 —> Click on the ‘Install Certificates.command’ and run the file. googleimagesdownload: command not found ---------------------------------------- +======================================= While using the above commands, if you get ``Error: -bash: googleimagesdownload: command not found`` then you have to set the correct path variable. @@ -35,13 +36,13 @@ together they make: ``/Library/Frameworks/Python.framework/Versions/2.7/bin`` wh [Errno 13] Permission denied creating directory 'downloads' ------------------------------------------------------------ +=========================================================== When you run the command, it downloads the images in the current directory (the directory from where you are running the command). If you get permission denied error for creating the `downloads directory`, then move to a directory in which you have the write permission and then run the command again. Permission denied while installing the library ----------------------------------------------- +============================================== On MAC and Linux, when you get permission denied when installing the library using pip, try doing a user install. @@ -53,7 +54,7 @@ You can also run pip install as a superuser with ``sudo pip install google_image Installing the chromedriver (with Selenium) -------------------------------------------- +=========================================== If you would want to download more than 100 images per keyword, then you will need to install 'selenium' library along with 'chromedriver' extension. diff --git a/docs/usage.rst b/docs/usage.rst index 40a3acda..38197628 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -1,3 +1,7 @@ +===== +Usage +===== + Using the library from Command Line Interface ============================================= diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index 5848b85a..f07ccb3a 100755 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -574,7 +574,7 @@ def download_image_thumbnail(self,image_url,main_directory,dir_name,return_image # Download Images - def download_image(self,image_url,image_format,main_directory,dir_name,count,print_urls,socket_timeout,prefix,print_size,no_numbering,no_download,save_source,img_src,silent_mode,thumbnail_only): + def download_image(self,image_url,image_format,main_directory,dir_name,count,print_urls,socket_timeout,prefix,print_size,no_numbering,no_download,save_source,img_src,silent_mode,thumbnail_only,format): if thumbnail_only: return "success", "Skipping image download...", str(image_url[(image_url.rfind('/')) + 1:]), image_url if not silent_mode: @@ -596,16 +596,27 @@ def download_image(self,image_url,image_format,main_directory,dir_name,count,pri data = response.read() response.close() + extensions = [".jpg", ".jpeg", ".gif", ".png", ".bmp", ".svg", ".webp", ".ico"] # keep everything after the last '/' image_name = str(image_url[(image_url.rfind('/')) + 1:]) - # if no extension then add it - # remove everything after the image name - if image_format == "": - image_name = image_name + "." + "jpg" - elif image_format == "jpeg": - image_name = image_name[:image_name.find(image_format) + 4] + if format: + if not image_format or image_format != format: + download_status = 'fail' + download_message = "Wrong image format returned. Skipping..." + return_image_name = '' + absolute_path = '' + return download_status, download_message, return_image_name, absolute_path + + if image_format == "" or not image_format or "." + image_format not in extensions: + download_status = 'fail' + download_message = "Invalid or missing image format. Skipping..." + return_image_name = '' + absolute_path = '' + return download_status, download_message, return_image_name, absolute_path + elif image_name.lower().find("." + image_format) < 0: + image_name = image_name + "." + image_format else: - image_name = image_name[:image_name.find(image_format) + 3] + image_name = image_name[:image_name.lower().find("." + image_format) + (len(image_format) + 1)] # prefix name in image if prefix: @@ -748,7 +759,7 @@ def _get_all_items(self,page,main_directory,dir_name,limit,arguments): print("\nImage Metadata: " + str(object)) #download the images - download_status,download_message,return_image_name,absolute_path = self.download_image(object['image_link'],object['image_format'],main_directory,dir_name,count,arguments['print_urls'],arguments['socket_timeout'],arguments['prefix'],arguments['print_size'],arguments['no_numbering'],arguments['no_download'],arguments['save_source'],object['image_source'],arguments["silent_mode"],arguments["thumbnail_only"]) + download_status,download_message,return_image_name,absolute_path = self.download_image(object['image_link'],object['image_format'],main_directory,dir_name,count,arguments['print_urls'],arguments['socket_timeout'],arguments['prefix'],arguments['print_size'],arguments['no_numbering'],arguments['no_download'],arguments['save_source'],object['image_source'],arguments["silent_mode"],arguments["thumbnail_only"],arguments['format']) if not arguments["silent_mode"]: print(download_message) if download_status == "success": From b95c535ebbbc000aa5e8f146ccf434bf5b54664d Mon Sep 17 00:00:00 2001 From: Hardik Vasa Date: Mon, 13 May 2019 16:06:04 -0700 Subject: [PATCH 78/83] Migrate to Sphinx documentation --- README.rst | 579 +------------------------------------ docs/_static/.DS_Store | Bin 6148 -> 0 bytes docs/_static/overrides.css | 14 - 3 files changed, 15 insertions(+), 578 deletions(-) delete mode 100644 docs/_static/.DS_Store delete mode 100644 docs/_static/overrides.css diff --git a/README.rst b/README.rst index d1c5515a..3a2ea356 100644 --- a/README.rst +++ b/README.rst @@ -3,575 +3,26 @@ Google Images Download Python Script for 'searching' and 'downloading' hundreds of Google images to the local hard disk! -Contents - -.. contents:: :local: - -Summary -======= - -This is a command line python program to search keywords/key-phrases on Google Images -and optionally download images to your computer. You can also invoke this script from -another python file. - -This is a small and ready-to-run program. No dependencies are required to be installed -if you would only want to download up to 100 images per keyword. If you would want **more than 100 -images** per keyword, then you would need to install ``Selenium`` library along with ``chromedriver``. -Detailed instructions in the troubleshooting section. - - -Compatibility +Documentation ============= -This program is compatible with both the versions of python - 2.x and 3.x (recommended). -It is a download-and-run program with no changes to the file. -You will just have to specify parameters through the command line. - -Installation -============ - -You can use **one of the below methods** to download and use this repository. - -Using pip - -.. code-block:: bash - - $ pip install google_images_download - -Manually using CLI - -.. code-block:: bash - - $ git clone https://github.com/hardikvasa/google-images-download.git - $ cd google-images-download && sudo python setup.py install - -Manually using UI - -Go to the `repo on github `__ ==> Click on 'Clone or Download' ==> Click on 'Download ZIP' and save it on your local disk. - -Usage - Using Command Line Interface -==================================== - -If installed via pip or using CLI, use the following command: - -.. code-block:: bash - - $ googleimagesdownload [Arguments...] - -If downloaded via the UI, unzip the file downloaded, go to the 'google_images_download' directory and use one of the below commands: - -.. code-block:: bash - - $ python3 google_images_download.py [Arguments...] - OR - $ python google_images_download.py [Arguments...] - - -Usage - From another python file -================================ - -If you would want to use this library from another python file, you could use it as shown below: - -.. code-block:: python - - from google_images_download import google_images_download - - response = google_images_download.googleimagesdownload() - absolute_image_paths = response.download({}) - - -Arguments -========= - -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| Argument | Short hand | Description | -+===================+=============+===============================================================================================================================+ -| config_file | cf | You can pass the arguments inside a config file. This is an alternative to passing arguments on the command line directly. | -| | | | -| | | Please refer to the | -| | | `config file format `__ below | -| | | | -| | | * If 'config_file' argument is present, the program will use the config file and command line arguments will be discarded | -| | | * Config file can only be in **JSON** format | -| | | * Please refrain from passing invalid arguments from config file. Refer to the below arguments list | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| keywords | k | Denotes the keywords/key phrases you want to search for. For more than one keywords, wrap it in single quotes. | -| | | | -| | | Tips: | -| | | | -| | | * If you simply type the keyword, Google will best try to match it | -| | | * If you want to search for exact phrase, you can wrap the keywords in double quotes ("") | -| | | * If you want to search to contain either of the words provided, use **OR** between the words. | -| | | * If you want to explicitly not want a specific word use a minus sign before the word (-) | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| keywords_from_file| kf | Denotes the file name from where you would want to import the keywords. | -| | | | -| | | Add one keyword per line. Blank/Empty lines are truncated automatically. | -| | | | -| | | Only file types '.txt' or '.csv' are allowed. | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| prefix_keywords | pk | Denotes additional words added before main keyword while making the search query. | -| | | | -| | | The final search query would be: | -| | | | -| | | So, for example, if the keyword is 'car' and prefix_keyword is 'red,yellow,blue', it will search and download images for | -| | | 'red car', 'yellow car' and 'blue car' individually | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| suffix_keywords | sk | Denotes additional words added after main keyword while making the search query. | -| | | | -| | | The final search query would be: | -| | | | -| | | So, for example, if the keyword is 'car' and suffix_keyword is 'red,yellow,blue', it will search and download images for | -| | | 'car red', 'car yellow' and 'car blue' individually | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| limit | l | Denotes number of images that you want to download. | -| | | | -| | | You can specify any integer value here. It will try and get all the images that it finds in the google image search page. | -| | | | -| | | If this value is not specified, it defaults to 100. | -| | | | -| | | **Note**: In case of occasional errors while downloading images, you could get less than 100 (if the limit is set to 100) | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| related_images | ri | This argument downloads a ton of images related to the keyword you provided. | -| | | | -| | | Google Images page returns list of related keywords to the keyword you have mentioned in the query. This tool downloads | -| | | images from each of those related keywords based on the limit you have mentioned in your query | -| | | | -| | | This argument does not take any value. Just add '--related_images' or '-ri' in your query. | -| | | | -| | | **Note:** This argument can download hundreds or thousands of additional images so please use this carefully. | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| format | f | Denotes the format/extension of the image that you want to download. | -| | | | -| | | `Possible values: jpg, gif, png, bmp, svg, webp, ico, raw` | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| color | co | Denotes the color filter that you want to apply to the images. | -| | | | -| | | `Possible values: red, orange, yellow, green, teal, blue, purple, pink, white, gray, black, brown` | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| color_type | ct | Denotes the color type you want to apply to the images. | -| | | | -| | | `Possible values: full-color, black-and-white, transparent` | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| usage_rights | r | Denotes the usage rights/licence under which the image is classified. | -| | | | -| | | `Possible values:` | -| | | | -| | | * `labeled-for-reuse-with-modifications`, | -| | | * `labeled-for-reuse`, | -| | | * `labeled-for-noncommercial-reuse-with-modification`, | -| | | * `labeled-for-nocommercial-reuse` | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| size | s | Denotes the relative size of the image to be downloaded. | -| | | | -| | | `Possible values: large, medium, icon, >400*300, >640*480, >800*600, >1024*768, >2MP, >4MP, >6MP, >8MP, >10MP, | -| | | >12MP, >15MP, >20MP, >40MP, >70MP` | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| exact_size | es | You can specify the exact size/resolution of the images | -| | | | -| | | This value of this argument can be specified as ```` where the fist integer stands for width of the image | -| | | and the second integer stands for the height of the image. For example, ``-es 1024,786`` | -| | | | -| | | **Note**: You cannot specify both 'size' and 'exact_size' arguments in the same query. You can only give one of them. | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| aspect_ratio | a | Denotes the aspect ratio of images to download. | -| | | | -| | | `Possible values: tall, square, wide, panoramic` | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| type | t | Denotes the type of image to be downloaded. | -| | | | -| | | `Possible values: face, photo, clip-art, line-drawing, animated` | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| time | w | Denotes the time the image was uploaded/indexed. | -| | | | -| | | `Possible values: past-24-hours, past-7-days, past-month, past-year` | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| time_range | wr | Denotes the time range for which you want to search the images | -| | | | -| | | The value of this parameter should be in the following format '{"time_min":"MM/DD/YYYY","time_max":"MM/DD/YYYY"}' | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| delay | d | Time to wait between downloading two images | -| | | | -| | | Time is to be specified in seconds. But you can have sub-second times by using decimal points. | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| url | u | Allows you search by image when you have the URL from the Google Images page. | -| | | It downloads images from the google images link provided | -| | | | -| | | If you are searching an image on the browser google images page, simply grab the browser URL and paste it in this parameter | -| | | It will download all the images seen on that page. | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| single_image | x | Allows you to download one image if the complete (absolute) URL of the image is provided | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| output_directory | o | Allows you specify the main directory name in which the images are downloaded. | -| | | | -| | | If not specified, it will default to 'downloads' directory. This directory is located in the path from where you run this code| -| | | | -| | | The directory structure would look like: ```` | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| image_directory | i | This lets you specify a directory inside of the main directory (output_directory) in which the images will be saved | -| | | | -| | | If not specified, it will default to the name of the keyword. | -| | | | -| | | The directory structure would look like: ```` | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| no_directory | n | This option allows you download images directly in the main directory (output_directory) without an image_directory | -| | | | -| | | The directory structure would look like: ```` | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| proxy | px | Allows you to specify proxy server setting for all your requests | -| | | | -| | | You can specify the proxy settings in 'IP:Port' format | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| similar_images | si | Reverse Image Search or 'Search by Image' as it is referred to on Google. | -| | | | -| | | Searches and downloads images that are similar to the absolute image link/url you provide. | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| specific_site | ss | Allows you to download images with keywords only from a specific website/domain name you mention. | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| print_urls | p | Print the URLs of the images on the console. These image URLs can be used for debugging purposes | -| | | | -| | | This argument does not take any value. Just add '--print_urls' or '-p' in your query. | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| print_size | ps | Prints the size of the images on the console | -| | | | -| | | The size denoted the actual size of the image and not the size of the image on disk | -| | | | -| | | This argument does not take any value. Just add '--print_size' or '-ps' in your query. | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| print_paths | pp | Prints the list of all the absolute paths of the downloaded images | -| | | | -| | | When calling the script from another python file, this list will be saved in a variable (as shown in the example below) | -| | | | -| | | This argument also allows you to print the list on the console | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| metadata | m | Prints the metada of the image on the console. | -| | | | -| | | This includes image size, origin, image attributes, description, image URL, etc. | -| | | | -| | | This argument does not take any value. Just add '--metadata' or '-m' in your query. | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| extract_metadata | e | This option allows you to save metadata of all the downloaded images in a JSON file. | -| | | | -| | | This file can be found in the ``logs/`` directory. The name of the file would be same as the keyword name | -| | | | -| | | This argument does not take any value. Just add '--extract_metadata' or '-e' in your query. | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| socket_timeout | st | Allows you to specify the time to wait for socket connection. | -| | | | -| | | You could specify a higher timeout time for slow internet connection. The default value is 10 seconds. | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| thumbnail | th | Downloads image thumbnails corresponding to each image downloaded. | -| | | | -| | | Thumbnails are saved in their own sub-directories inside of the main directory. | -| | | | -| | | This argument does not take any value. Just add '--thumbnail' or '-th' in your query. | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| thumbnail_only | tho | Downloads only thumbnails without downloading actual size images | -| | | | -| | | Thumbnails are saved in their own sub-directories inside of the main directory. | -| | | | -| | | This argument does not take any value. Just add '--thumbnail_only' or '-tho' in your query. | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| language | la | Defines the language filter. The search results are automatically returned in that language | -| | | | -| | | `Possible Values: Arabic, Chinese (Simplified), Chinese (Traditional), Czech, Danish, Dutch, English, Estonian. Finnish, | -| | | French, German, Greek, Hebrew, Hungarian, Icelandic, Italian, Japanese, Korean, Latvianm, Lithuanian, Norwegian, Portuguese, | -| | | Polish, Romanian, Russian, Spanish, Swedish, Turkish` | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| prefix | pr | A word that you would want to prefix in front of actual image name. | -| | | | -| | | This feature can be used to rename files for image identification purpose. | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| chromedriver | cd | With this argument you can pass the path to the 'chromedriver'. | -| | | | -| | | The path looks like this: "path/to/chromedriver". In windows it will be "C:\\path\\to\\chromedriver.exe" | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| safe_search | sa | Searches for images with the Safe Search filter On | -| | | | -| | | And this filter will be Off by default if you do not specify the safe_search argument | -| | | | -| | | This argument does not take any value. Just add '--safe_search' or '-sa' in your query. | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| no_numbering | nn | When you specify this argument, the script does not add ordered numbering as prefix to the images it downloads | -| | | | -| | | If this argument is not specified, the images are numbered in order in which they are downloaded | -| | | | -| | | This argument does not take any value. Just add '--no_numbering' or '-nn' in your query. | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| offset | of | When you specify this argument, it will skip the offset number of links before it starts downloading images | -| | | | -| | | If this argument is not specified, the script will start downloading form the first link until the limit is reached | -| | | | -| | | This argument takes integer. Make sure the value of this argument is less than the value of limit | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| save_source | is | Creates a text file with list of downloaded images along with their source page paths. | -| | | | -| | | This argument takes a string, name of the text file. | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| no_download | nd | Print the URLs on the console without downloading images or thumbnails. These image URLs can be used for other purposes | -| | | | -| | | This argument does not take any value. Just add '--no-download' or '-nd' in your query. | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| silent_mode | sil | Remains silent. Does not print notification messages on the terminal/command prompt. | -| | | | -| | | This argument will override all the other print arguments (like print_urls, print_size, etc.) | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ -| help | h | show the help message regarding the usage of the above arguments | -+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ - - -**Note:** If ``single_image`` or ``url`` parameter is not present, then keywords is a mandatory parameter. No other parameters are mandatory. - -Config File Format -================== - -You can either pass the arguments directly from the command as in the examples below or you can pass it through a config file. Below is a sample of how a config -file looks. - -You can pass more than one record through a config file. The below sample consist of two set of records. The code will iterate through each of the record and -download images based on arguments passed. - -.. code:: json - - { - "Records": [ - { - "keywords": "apple", - "limit": 5, - "color": "green", - "print_urls": true - }, - { - "keywords": "universe", - "limit": 15, - "size": "large", - "print_urls": true - } - ] - } - - -Examples -======== - -- If you are calling this library from another python file, below is the sample code - -.. code-block:: python - - from google_images_download import google_images_download #importing the library - - response = google_images_download.googleimagesdownload() #class instantiation - - arguments = {"keywords":"Polar bears,baloons,Beaches","limit":20,"print_urls":True} #creating list of arguments - paths = response.download(arguments) #passing the arguments to the function - print(paths) #printing absolute paths of the downloaded images - -- If you are passing arguments from a config file, simply pass the config_file argument with name of your JSON file - -.. code-block:: bash - - $ googleimagesdownload -cf example.json - -- Simple example of using keywords and limit arguments - -.. code-block:: bash - - $ googleimagesdownload --keywords "Polar bears, baloons, Beaches" --limit 20 - -- Using Suffix Keywords allows you to specify words after the main - keywords. For example if the ``keyword = car`` and - ``suffix keyword = 'red,blue'`` then it will first search for - ``car red`` and then ``car blue`` - -.. code-block:: bash - - $ googleimagesdownload --k "car" -sk 'red,blue,white' -l 10 - -- To use the short hand command - -.. code-block:: bash - - $ googleimagesdownload -k "Polar bears, baloons, Beaches" -l 20 - -- To download images with specific image extension/format - -.. code-block:: bash - - $ googleimagesdownload --keywords "logo" --format svg - -- To use color filters for the images - -.. code-block:: bash +`Link to Documentation `__. - $ googleimagesdownload -k "playground" -l 20 -co red - -- To use non-English keywords for image search - -.. code-block:: bash - - $ googleimagesdownload -k "北极熊" -l 5 - -- To download images from the google images link - -.. code-block:: bash - - $ googleimagesdownload -k "sample" -u - -- To save images in specific main directory (instead of in 'downloads') - -.. code-block:: bash - - $ googleimagesdownload -k "boat" -o "boat_new" - -- To download one single image with the image URL - -.. code-block:: bash - - $ googleimagesdownload --keywords "baloons" --single_image - -- To download images with size and type constrains - -.. code-block:: bash - - $ googleimagesdownload --keywords "baloons" --size medium --type animated - -- To download images with specific usage rights - -.. code-block:: bash - - $ googleimagesdownload --keywords "universe" --usage_rights labeled-for-reuse - -- To download images with specific color type - -.. code-block:: bash - - $ googleimagesdownload --keywords "flowers" --color_type black-and-white - -- To download images with specific aspect ratio - -.. code-block:: bash - - $ googleimagesdownload --keywords "universe" --aspect_ratio panoramic - -- To download images which are similar to the image in the image URL that you provided (Reverse Image search). - -.. code-block:: bash - - $ googleimagesdownload -si -l 10 - -- To download images from specific website or domain name for a given keyword - -.. code-block:: bash - - $ googleimagesdownload --keywords "universe" --specific_site example.com - -===> The images would be downloaded in their own sub-directories inside the main directory -(either the one you provided or in 'downloads') in the same folder you are in. - --------------- - -Troubleshooting Errors/Issues -============================= - -**#~~~# SSL Errors** - -If you do see SSL errors on Mac for Python 3, -please go to Finder —> Applications —> Python 3 —> Click on the ‘Install Certificates.command’ -and run the file. - -**#~~~# googleimagesdownload: command not found** - -While using the above commands, if you get ``Error: -bash: googleimagesdownload: command not found`` then you have to set the correct path variable. - -To get the details of the repo, run the following command: - -.. code-block:: bash - - $ pip show -f google_images_download - -you will get the result like this: - -.. code-block:: bash - - Location: /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages - Files: - ../../../bin/googleimagesdownload - -together they make: ``/Library/Frameworks/Python.framework/Versions/2.7/bin`` which you need add it to the path: - -.. code-block:: bash - - $ export PATH="/Library/Frameworks/Python.framework/Versions/2.7/bin" - - -**#~~~# [Errno 13] Permission denied creating directory 'downloads'** - -When you run the command, it downloads the images in the current directory (the directory from where you are running the command). If you get permission denied error for creating the `downloads directory`, then move to a directory in which you have the write permission and then run the command again. - - -**#~~~# Permission denied while installing the library** - -On MAC and Linux, when you get permission denied when installing the library using pip, try doing a user install. - -.. code-block:: bash - - $ pip install google_images_download --user - -You can also run pip install as a superuser with ``sudo pip install google_images_download`` but it is not generally a good idea because it can cause issues with your system-level packages. - - -**#~~~# Installing the chromedriver (with Selenium)** - -If you would want to download more than 100 images per keyword, then you will need to install 'selenium' library along with 'chromedriver' extension. - -If you have pip-installed the library or had run the setup.py file, Selenium would have automatically installed on your machine. You will also need Chrome browser on your machine. For chromedriver: - -`Download the correct chromedriver `__ based on your operating system. - -On **Windows** or **MAC** if for some reason the chromedriver gives you trouble, download it under the current directory and run the command. - -On windows however, the path to chromedriver has to be given in the following format: - -``C:\\complete\\path\\to\\chromedriver.exe`` - -On **Linux** if you are having issues installing google chrome browser, refer to this `CentOS or Amazon Linux Guide `__ -or `Ubuntu Guide `__ - -For **All the operating systems** you will have to use '--chromedriver' or '-cd' argument to specify the path of -chromedriver that you have downloaded in your machine. - -If on any rare occasion the chromedriver does not work for you, try downgrading it to a lower version. - -Structure -========= - -Below diagram represents the algorithm logic to download images. - -.. figure:: http://www.zseries.in/flow-chart.png - :alt: - -Contribute -========== - -Anyone is welcomed to contribute to this script. -If you would like to make a change, open a pull request. -For issues and discussion visit the -`Issue Tracker `__. - -The aim of this repo is to keep it simple, stand-alone, backward compatible and 3rd party dependency proof. Disclaimer ========== -This program lets you download tons of images from Google. -Please do not download or use any image that violates its copyright terms. -Google Images is a search engine that merely indexes images and allows you to find them. -It does NOT produce its own images and, as such, it doesn't own copyright on any of them. -The original creators of the images own the copyrights. +.. warning:: + + This program lets you download tons of images from Google. + Please do not download or use any image that violates its copyright terms. + Google Images is a search engine that merely indexes images and allows you to find them. + It does NOT produce its own images and, as such, it doesn't own copyright on any of them. + The original creators of the images own the copyrights. -Images published in the United States are automatically copyrighted by their owners, -even if they do not explicitly carry a copyright warning. -You may not reproduce copyright images without their owner's permission, -except in "fair use" cases, -or you could risk running into lawyer's warnings, cease-and-desist letters, and copyright suits. -Please be very careful before its usage! + Images published in the United States are automatically copyrighted by their owners, + even if they do not explicitly carry a copyright warning. + You may not reproduce copyright images without their owner's permission, + except in "fair use" cases, + or you could risk running into lawyer's warnings, cease-and-desist letters, and copyright suits. + Please be very careful before its usage! Use this script/code only for educational purposes. diff --git a/docs/_static/.DS_Store b/docs/_static/.DS_Store deleted file mode 100644 index 12e84d138d67e0b8ba6bb3e74306fff687a7dbbe..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKJxc>o5S-N%0TEJKzS2stwmCv9to;L$#6k!hBxrq|KRcQIP&`fx8w;6*-Fa_s z=e^t%w^;zTeBIsx3jlMvBi=ns&7ZqZ?5r|Ir1OkDUa-Y84!D_Af1hyfC3?J(wIA^} zJmv7P>o@CNzh;&H#5?-+F^nsj6p#W^Knh3!De!v*y!X=P7m12eKnh5KPX+w@(CCg` z;gA@g4u%*3h%=_cxQbk-qG3l_lndj8aRuhWF z?aa3*hjodHQa}of6*$lB!u$U{{fGH~OwvvYNP&N)fX!Bqt0iBldh6unyw^7R6aB+j o8|4hqiiy#Rx$#zf^`)-)n$Nq!Au;I82c4)t0 Date: Mon, 13 May 2019 16:17:22 -0700 Subject: [PATCH 79/83] minor doc changes --- docs/_static/overrides.css | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 docs/_static/overrides.css diff --git a/docs/_static/overrides.css b/docs/_static/overrides.css new file mode 100644 index 00000000..68eadba4 --- /dev/null +++ b/docs/_static/overrides.css @@ -0,0 +1,7 @@ +table.docutils td, table.docutils th { + padding: 1px 8px 1px 5px; + border-top: 0; + border-left: 0; + border-right: 1px dotted #33B8FF; + border-bottom: 1px dotted #33B8FF; +} \ No newline at end of file From 77660787242b543041789a9483ffc65df906bac9 Mon Sep 17 00:00:00 2001 From: Hardik Vasa Date: Mon, 13 May 2019 16:17:58 -0700 Subject: [PATCH 80/83] minor doc changes --- README.rst | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/README.rst b/README.rst index 3a2ea356..36c29eb0 100644 --- a/README.rst +++ b/README.rst @@ -12,17 +12,15 @@ Documentation Disclaimer ========== -.. warning:: - - This program lets you download tons of images from Google. - Please do not download or use any image that violates its copyright terms. - Google Images is a search engine that merely indexes images and allows you to find them. - It does NOT produce its own images and, as such, it doesn't own copyright on any of them. - The original creators of the images own the copyrights. - - Images published in the United States are automatically copyrighted by their owners, - even if they do not explicitly carry a copyright warning. - You may not reproduce copyright images without their owner's permission, - except in "fair use" cases, - or you could risk running into lawyer's warnings, cease-and-desist letters, and copyright suits. - Please be very careful before its usage! Use this script/code only for educational purposes. +This program lets you download tons of images from Google. +Please do not download or use any image that violates its copyright terms. +Google Images is a search engine that merely indexes images and allows you to find them. +It does NOT produce its own images and, as such, it doesn't own copyright on any of them. +The original creators of the images own the copyrights. + +Images published in the United States are automatically copyrighted by their owners, +even if they do not explicitly carry a copyright warning. +You may not reproduce copyright images without their owner's permission, +except in "fair use" cases, +or you could risk running into lawyer's warnings, cease-and-desist letters, and copyright suits. +Please be very careful before its usage! Use this script/code only for educational purposes. From 7679d9bf1163fc07098bb7b0a801ebc3d80e527f Mon Sep 17 00:00:00 2001 From: Hardik Vasa Date: Mon, 13 May 2019 17:05:40 -0700 Subject: [PATCH 81/83] Doc changes --- README.rst | 5 ++++- docs/conf.py | 8 ++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index 36c29eb0..b1e08f65 100644 --- a/README.rst +++ b/README.rst @@ -6,7 +6,10 @@ Python Script for 'searching' and 'downloading' hundreds of Google images to the Documentation ============= -`Link to Documentation `__. +* `Documentation Homepage `__ +* `Installation `__ +* `Input arguments `__ +* `Examples and Code Samples `__ Disclaimer diff --git a/docs/conf.py b/docs/conf.py index 81721a54..339dc57f 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -20,11 +20,11 @@ html_static_path = ['_static'] +def setup(app): + app.add_stylesheet('overrides.css') # may also be an URL + html_context = { - 'css_files': [ - '_static/overrides.css', # override wide tables in RTD theme - ], - "display_github": True, # Add 'Edit on Github' link instead of 'View page source' + "display_github": False, # Add 'Edit on Github' link instead of 'View page source' "last_updated": True, "commit": False, } From 9c3cb54e877a896a37b36fdee4bd49ccd1b57f65 Mon Sep 17 00:00:00 2001 From: Oli Date: Tue, 21 May 2019 18:26:29 +0100 Subject: [PATCH 82/83] Skip images with certain urls (#213) * Add flag to ignore certain urls * Updated README and parser arguments * Fix error on missing -iu option Resolves issues related to pull request #213. Removed duplicated validation --- google_images_download/google_images_download.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index f07ccb3a..84b055a9 100755 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -40,7 +40,7 @@ "output_directory", "image_directory", "no_directory", "proxy", "similar_images", "specific_site", "print_urls", "print_size", "print_paths", "metadata", "extract_metadata", "socket_timeout", "thumbnail", "thumbnail_only", "language", "prefix", "chromedriver", "related_images", "safe_search", "no_numbering", - "offset", "no_download","save_source","silent_mode"] + "offset", "no_download","save_source","silent_mode","ignore_urls"] def user_input(): @@ -112,6 +112,7 @@ def user_input(): parser.add_argument('-nn', '--no_numbering', default=False, help="Allows you to exclude the default numbering of images", action="store_true") parser.add_argument('-of', '--offset', help="Where to start in the fetched links", type=str, required=False) parser.add_argument('-nd', '--no_download', default=False, help="Prints the URLs of the images and/or thumbnails without downloading them", action="store_true") + parser.add_argument('-iu', '--ignore_urls', default=False, help="delimited list input of image urls/keywords to ignore", type=str) parser.add_argument('-sil', '--silent_mode', default=False, help="Remains silent. Does not print notification messages on the terminal", action="store_true") parser.add_argument('-is', '--save_source', help="creates a text file containing a list of downloaded images along with source page url", type=str, required=False) @@ -507,7 +508,7 @@ def create_directories(self,main_directory, dir_name,thumbnail,thumbnail_only): # Download Images - def download_image_thumbnail(self,image_url,main_directory,dir_name,return_image_name,print_urls,socket_timeout,print_size,no_download,save_source,img_src): + def download_image_thumbnail(self,image_url,main_directory,dir_name,return_image_name,print_urls,socket_timeout,print_size,no_download,save_source,img_src,ignore_urls): if print_urls or no_download: print("Image URL: " + image_url) if no_download: @@ -574,7 +575,10 @@ def download_image_thumbnail(self,image_url,main_directory,dir_name,return_image # Download Images - def download_image(self,image_url,image_format,main_directory,dir_name,count,print_urls,socket_timeout,prefix,print_size,no_numbering,no_download,save_source,img_src,silent_mode,thumbnail_only,format): + def download_image(self,image_url,image_format,main_directory,dir_name,count,print_urls,socket_timeout,prefix,print_size,no_numbering,no_download,save_source,img_src,silent_mode,thumbnail_only,format,ignore_urls): + if ignore_urls: + if any(url in image_url for url in ignore_urls.split(',')): + return "fail","Image ignored",None,None if thumbnail_only: return "success", "Skipping image download...", str(image_url[(image_url.rfind('/')) + 1:]), image_url if not silent_mode: @@ -759,14 +763,14 @@ def _get_all_items(self,page,main_directory,dir_name,limit,arguments): print("\nImage Metadata: " + str(object)) #download the images - download_status,download_message,return_image_name,absolute_path = self.download_image(object['image_link'],object['image_format'],main_directory,dir_name,count,arguments['print_urls'],arguments['socket_timeout'],arguments['prefix'],arguments['print_size'],arguments['no_numbering'],arguments['no_download'],arguments['save_source'],object['image_source'],arguments["silent_mode"],arguments["thumbnail_only"],arguments['format']) + download_status,download_message,return_image_name,absolute_path = self.download_image(object['image_link'],object['image_format'],main_directory,dir_name,count,arguments['print_urls'],arguments['socket_timeout'],arguments['prefix'],arguments['print_size'],arguments['no_numbering'],arguments['no_download'],arguments['save_source'],object['image_source'],arguments["silent_mode"],arguments["thumbnail_only"],arguments['format'],arguments['ignore_urls']) if not arguments["silent_mode"]: print(download_message) if download_status == "success": # download image_thumbnails if arguments['thumbnail'] or arguments["thumbnail_only"]: - download_status, download_message_thumbnail = self.download_image_thumbnail(object['image_thumbnail_url'],main_directory,dir_name,return_image_name,arguments['print_urls'],arguments['socket_timeout'],arguments['print_size'],arguments['no_download'],arguments['save_source'],object['image_source']) + download_status, download_message_thumbnail = self.download_image_thumbnail(object['image_thumbnail_url'],main_directory,dir_name,return_image_name,arguments['print_urls'],arguments['socket_timeout'],arguments['print_size'],arguments['no_download'],arguments['save_source'],object['image_source'],arguments['ignore_urls']) if not arguments["silent_mode"]: print(download_message_thumbnail) From 0d2bf8f17b5a8806d90df7258e7a172aa0cb7963 Mon Sep 17 00:00:00 2001 From: Hardik Vasa Date: Tue, 21 May 2019 11:24:10 -0700 Subject: [PATCH 83/83] minor changes after merging #213 doc updates for #213 minor other docs updates - linking doc pages doc update for #140 --- docs/_static/.DS_Store | Bin 0 -> 6148 bytes docs/arguments.rst | 8 ++++++++ docs/examples.rst | 14 ++++++++++++-- docs/index.rst | 2 ++ docs/installation.rst | 2 ++ docs/structure.rst | 4 ++++ docs/troubleshooting.rst | 17 +++++++++++++++++ docs/usage.rst | 4 ++++ .../google_images_download.py | 10 +++++----- setup.py | 2 +- 10 files changed, 55 insertions(+), 8 deletions(-) create mode 100644 docs/_static/.DS_Store diff --git a/docs/_static/.DS_Store b/docs/_static/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..12e84d138d67e0b8ba6bb3e74306fff687a7dbbe GIT binary patch literal 6148 zcmeHKJxc>o5S-N%0TEJKzS2stwmCv9to;L$#6k!hBxrq|KRcQIP&`fx8w;6*-Fa_s z=e^t%w^;zTeBIsx3jlMvBi=ns&7ZqZ?5r|Ir1OkDUa-Y84!D_Af1hyfC3?J(wIA^} zJmv7P>o@CNzh;&H#5?-+F^nsj6p#W^Knh3!De!v*y!X=P7m12eKnh5KPX+w@(CCg` z;gA@g4u%*3h%=_cxQbk-qG3l_lndj8aRuhWF z?aa3*hjodHQa}of6*$lB!u$U{{fGH~OwvvYNP&N)fX!Bqt0iBldh6unyw^7R6aB+j o8|4hqiiy#Rx$#zf^`)-)n$Nq!Au;I82c4)t0`__ + +Link to `Documentation Homepage `__ + +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | Argument | Short hand | Description | +===================+=============+===============================================================================================================================+ @@ -235,6 +239,10 @@ Input Arguments | | | | | | | This argument will override all the other print arguments (like print_urls, print_size, etc.) | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ +| ignore_urls | iu | Skip downloading of images whose urls contain certain strings such as wikipedia.org | +| | | | +| | | This argument takes a delimited set of values e.g. wikipedia.org,wikimedia.org | ++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ | help | h | show the help message regarding the usage of the above arguments | +-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+ diff --git a/docs/examples.rst b/docs/examples.rst index 87f6e585..24b47b73 100644 --- a/docs/examples.rst +++ b/docs/examples.rst @@ -2,6 +2,12 @@ Examples ======== +Link to `GitHub repo `__ + +Link to `Documentation Homepage `__ + +Link to `Input arguments or parameters `__ + Config File Format ================== @@ -31,8 +37,8 @@ download images based on arguments passed. } -Command line examples -===================== +Code sample - Importing the library +=================================== - If you are calling this library from another python file, below is the sample code @@ -46,6 +52,10 @@ Command line examples paths = response.download(arguments) #passing the arguments to the function print(paths) #printing absolute paths of the downloaded images + +Command line examples +===================== + - If you are passing arguments from a config file, simply pass the config_file argument with name of your JSON file .. code-block:: bash diff --git a/docs/index.rst b/docs/index.rst index 6f32a432..3e276d00 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -2,6 +2,8 @@ Google Images Download ====================== +Link to `GitHub repo `__ + .. index:: Summary Summary diff --git a/docs/installation.rst b/docs/installation.rst index 0e1362f5..457f3ef0 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -2,6 +2,8 @@ Installation ============ +Link to `Documentation Homepage `__ + You can use **one of the below methods** to download and use this repository. Install using pip diff --git a/docs/structure.rst b/docs/structure.rst index 77e93faf..8e142fde 100644 --- a/docs/structure.rst +++ b/docs/structure.rst @@ -2,6 +2,10 @@ Workflow ======== +Link to `GitHub repo `__ + +Link to `Documentation Homepage `__ + Below diagram represents the algorithm logic to download images. .. figure:: http://www.zseries.in/flow-chart.png diff --git a/docs/troubleshooting.rst b/docs/troubleshooting.rst index 4e48e979..05e0aee6 100644 --- a/docs/troubleshooting.rst +++ b/docs/troubleshooting.rst @@ -2,6 +2,10 @@ Troubleshooting Errors/Issues ============================= +Link to `GitHub repo `__ + +Link to `Documentation Homepage `__ + SSL Errors ========== @@ -75,3 +79,16 @@ For **All the operating systems** you will have to use '--chromedriver' or '-cd' chromedriver that you have downloaded in your machine. If on any rare occasion the chromedriver does not work for you, try downgrading it to a lower version. + + +urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] +============================================== + +`Reference to this issue `__ + +Use the below command to install the SSL certificate on your machine. + +.. code-block:: bash + + cd /Applications/Python\ 3.7/ + ./Install\ Certificates.command diff --git a/docs/usage.rst b/docs/usage.rst index 38197628..a190167b 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -2,6 +2,10 @@ Usage ===== +Link to `GitHub repo `__ + +Link to `Documentation Homepage `__ + Using the library from Command Line Interface ============================================= diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index 84b055a9..fd89a3a9 100755 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -507,7 +507,7 @@ def create_directories(self,main_directory, dir_name,thumbnail,thumbnail_only): return - # Download Images + # Download Image thumbnails def download_image_thumbnail(self,image_url,main_directory,dir_name,return_image_name,print_urls,socket_timeout,print_size,no_download,save_source,img_src,ignore_urls): if print_urls or no_download: print("Image URL: " + image_url) @@ -576,14 +576,14 @@ def download_image_thumbnail(self,image_url,main_directory,dir_name,return_image # Download Images def download_image(self,image_url,image_format,main_directory,dir_name,count,print_urls,socket_timeout,prefix,print_size,no_numbering,no_download,save_source,img_src,silent_mode,thumbnail_only,format,ignore_urls): + if not silent_mode: + if print_urls or no_download: + print("Image URL: " + image_url) if ignore_urls: if any(url in image_url for url in ignore_urls.split(',')): - return "fail","Image ignored",None,None + return "fail", "Image ignored due to 'ignore url' parameter", None, image_url if thumbnail_only: return "success", "Skipping image download...", str(image_url[(image_url.rfind('/')) + 1:]), image_url - if not silent_mode: - if print_urls or no_download: - print("Image URL: " + image_url) if no_download: return "success","Printed url without downloading",None,image_url try: diff --git a/setup.py b/setup.py index 0a5d682b..e0d8c80c 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from codecs import open from os import path -__version__ = '2.7.0' +__version__ = '2.8.0' here = path.abspath(path.dirname(__file__))