diff --git a/.gitignore b/.gitignore
index dafb6cb1..48de0972 100644
--- a/.gitignore
+++ b/.gitignore
@@ -40,6 +40,15 @@ output/*/index.html
# Sphinx
docs/_build
+docs/.DS_Store
+docs/_static/*
# Cookiecutter
output/
+
+# Downloads
+downloads/
+
+# Logs
+logs/
+
diff --git a/LICENSE.md b/LICENSE.md
deleted file mode 100644
index 3361c0b2..00000000
--- a/LICENSE.md
+++ /dev/null
@@ -1,21 +0,0 @@
-The MIT License (MIT)
-
-Copyright (c) 2015-2018 Hardik Vasa
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
diff --git a/Licence.txt b/Licence.txt
index 3361c0b2..2aee4738 100644
--- a/Licence.txt
+++ b/Licence.txt
@@ -1,6 +1,6 @@
The MIT License (MIT)
-Copyright (c) 2015-2018 Hardik Vasa
+Copyright (c) 2015-2019 Hardik Vasa
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
diff --git a/README.md b/README.md
deleted file mode 100644
index 92ec9c52..00000000
--- a/README.md
+++ /dev/null
@@ -1,140 +0,0 @@
-# Google Images Download
-Python Script for 'searching' and 'downloading' hundreds of Google images to the local hard disk!
-
-## Summary
-This is a command line python program to search keywords/key-phrases on Google Images and then also optionally download one or more images to your computer. This is a small program which is ready-to-run, but still under development. Many more features will be added to it going forward.
-
-## Compatability
-This program is compatible with both the versions of python (2.x and 3.x). It is a download-and-run program with no changes to the file. You will just have to specify parameters through the command line.
-___
-
-## Installation
-**Using pip:**
-```
-$ pip install google_images_download
-```
-
-**Manually:**
-```
-$ git clone https://github.com/hardikvasa/google-images-download.git
-$ cd google-images-download && sudo python setup.py install
-```
-
-## Usage
-Go to the directory where you have the `google_images_download.py` file and type in the command:
-
-**Python3:** python3 google_images_download.py [Arguments...]
-
-**OR**
-
-**Python2:** python google_images_download.py [Arguments...]
-
-
-### Arguments
-
-| Argument | Short hand | Description |
-| --- | :---: | --- |
-|**keywords**| k | Denotes the keywords/key phrases you want to search for and the directory file name. Tips: * If you simply type the keyword, Google will best try to match it * If you want to search for exact phrase, you can wrap the keywords in double quotes ("") * If you want to search to contain either of the words provided, use **OR** between the words. * If you want to explicitly not want a specific word use a minus sign before the word (-)|
-|**suffix_keywords**| sk | Denotes additional words added after main keyword while making the search query. Useful when you have multiple suffix keywords for one keyword The final search query would be: |
-|**limit** | l |Denotes number of images that you want to download. |
-|**format** | f |Denotes the format/extension that you want to download. `Possible values: jpg, gif, png, bmp, svg, webp, ico`|
-|**color** | c |Denotes the color filter that you want to apply to the images. `Possible values: red, orange, yellow, green, teal, blue, purple, pink, white, gray, black, brown`|
-|**color_type** | ct |Denotes the color type you want to apply to the images. `Possible values: full-color, black-and-white, transparent`|
-|**usage_rights** | r |Denotes the usage rights/licence under which the image is classified. `Possible values: labled-for-reuse-with-modifications, labled-for-reuse, labled-for-noncommercial-reuse-with-modification, labled-for-nocommercial-reuse`|
-|**size** | s |Denotes the relative size of the image to be downloaded. `Possible values: large, medium, icon`|
-|**aspect_ratio** | a |Denotes the aspect ration of images to download. `Possible values: tall, square, wide, panoramic`|
-|**type** | t |Denotes the type of image to be downloaded. `Possible values: face,photo,clip-art,line-drawing,animated`|
-|**time** | w |Denotes the time the image was uploaded/indexed. `Possible values: past-24-hours, past-7-days`|
-|**delay** | d |Time to wait between downloading two images|
-|**url** | u |Allows you search by image. It downloads images from the google images link provided|
-|**single_image** | x |Allows you to download one image if the complete URL of the image is provided|
-|**output_directory** | o |Allows you specify the main directory name. If not specified, it will default to 'downloads'|
-|**similar_images** | si |Reverse Image Search. Searches and downloads images that are similar to the image link/url you provide.|
-|**specific_site** | ss |Allows you to download images with keywords only from a specific website/domain name you mention as indexed in Google Images.|
-|**print_urls** | p |Print the URLs of the images on the console. These image URLs can be used for debugging purposes|
-|**help** | h |show the help message regarding the usage of the above arguments|
-
-**Note:** If `single_image` or `url` parameter is not present, then keywords is a mandatory parameter. No other parameters are mandatory.
-
-## Examples
-* If you have python 2.x version installed
-
-`python google-images-download.py --keywords "Polar bears, baloons, Beaches" --limit 20`
-
-* If you have python 3.x version installed
-
-`python3 google-images-download.py --keywords "Polar bears, baloons, Beaches" --limit 20`
-
-* Using Suffix Keywords allows you to specify words after the main keywords. For example if the `keyword = car` and `suffix keyword = 'red,blue'` then it will first search for `car red` and then `car blue`
-
-`python3 google-images-download.py --k "car" -sk 'red,blue,white' -l 10`
-
-* To use the short hand command
-
-`python google-images-download.py -k "Polar bears, baloons, Beaches" -l 20`
-
-* To download images with specific image extension/format
-
-`python google-images-download.py --keywords "logo" --format svg`
-
-* To use color filters for the images
-
-`python google-images-download.py -k "playground" -l 20 -c red`
-
-* To use non-English keywords for image search
-
-`python google-images-download.py -k "北极熊" -l 5`
-
-* To download images from the google images link
-
-`python google-images-download.py -k "sample" -u `
-
-* To save images in specific main directory (instead of in 'downloads')
-
-`python google-images-download.py -k "boat" -o "boat_new"`
-
-* To download one single image with the image URL
-
-`python google-images-download.py --keywords "baloons" --single_image `
-
-* To download images with size and type constrains
-
-`python google-images-download.py --keywords "baloons" --size medium --type animated`
-
-* To download images with specific usage rights
-
-`python google-images-download.py --keywords "universe" --usage_rights labled-for-reuse`
-
-* To download images with specific color type
-
-`python google-images-download.py --keywords "flowers" --color_type black-and-white`
-
-* To download images with specific aspect ratio
-
-`python google-images-download.py --keywords "universe" --aspect_ratio panoramic`
-
-* To download images which are similar to the image in the image URL that you provided (Reverse Image search).
-
-`python3 pr.py -si -l 10`
-
-* To download images from specific website or domain name for a given keyword
-
-`python google-images-download.py --keywords "universe" --specific_site example.com`
-
-===> The images would be downloaded in their own sub-directories inside the main directory (either the one you provided or in 'downloads') in the same folder as the `google_images_download.py` file.
-
-
-___
-
-## SSL Errors
-If you do see SSL errors on Mac for Python 3 please go to Finder —> Applications —> Python 3 —> Click on the ‘Install Certificates.command’ and run the file.
-
-## Contribute
-Anyone is welcomed to contribute to this script. If you would like to make a change, open a pull request. For issues and discussion visit the [Issue Tracker](https://github.com/hardikvasa/google-images-download/issues).
-
-The aim of this repo is to keep it simple, stand-alone, backward compatible and 3rd party dependency proof.
-
-## Disclaimer
-This program lets you download tons of images from Google. Please do not download any image without violating its copyright terms. Google Images is a search engine that merely indexes images and allows you to find them. It does NOT produce its own images and, as such, it doesn't own copyright on any of them. The original creators of the images own the copyrights.
-
-Images published in the United States are automatically copyrighted by their owners, even if they do not explicitly carry a copyright warning. You may not reproduce copyright images without their owner's permission, except in "fair use" cases, or you could risk running into lawyer's warnings, cease-and-desist letters, and copyright suits. Please be very careful before its usage!
diff --git a/README.rst b/README.rst
new file mode 100644
index 00000000..b1e08f65
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,29 @@
+Google Images Download
+######################
+
+Python Script for 'searching' and 'downloading' hundreds of Google images to the local hard disk!
+
+Documentation
+=============
+
+* `Documentation Homepage `__
+* `Installation `__
+* `Input arguments `__
+* `Examples and Code Samples `__
+
+
+Disclaimer
+==========
+
+This program lets you download tons of images from Google.
+Please do not download or use any image that violates its copyright terms.
+Google Images is a search engine that merely indexes images and allows you to find them.
+It does NOT produce its own images and, as such, it doesn't own copyright on any of them.
+The original creators of the images own the copyrights.
+
+Images published in the United States are automatically copyrighted by their owners,
+even if they do not explicitly carry a copyright warning.
+You may not reproduce copyright images without their owner's permission,
+except in "fair use" cases,
+or you could risk running into lawyer's warnings, cease-and-desist letters, and copyright suits.
+Please be very careful before its usage! Use this script/code only for educational purposes.
diff --git a/docs/.DS_Store b/docs/.DS_Store
new file mode 100644
index 00000000..b0b71537
Binary files /dev/null and b/docs/.DS_Store differ
diff --git a/docs/Makefile b/docs/Makefile
index 09806c60..298ea9e2 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -1,230 +1,19 @@
-# Makefile for Sphinx documentation
+# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
-PAPER =
-BUILDDIR = build
+SOURCEDIR = .
+BUILDDIR = _build
-# User-friendly check for sphinx-build
-ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
- $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don\'t have Sphinx installed, grab it from http://sphinx-doc.org/)
-endif
-
-# Internal variables.
-PAPEROPT_a4 = -D latex_paper_size=a4
-PAPEROPT_letter = -D latex_paper_size=letter
-ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
-# the i18n builder cannot share the environment and doctrees with the others
-I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
-
-.PHONY: help
+# Put it first so that "make" without argument is like "make help".
help:
- @echo "Please use \`make ' where is one of"
- @echo " html to make standalone HTML files"
- @echo " dirhtml to make HTML files named index.html in directories"
- @echo " singlehtml to make a single large HTML file"
- @echo " pickle to make pickle files"
- @echo " json to make JSON files"
- @echo " htmlhelp to make HTML files and a HTML help project"
- @echo " qthelp to make HTML files and a qthelp project"
- @echo " applehelp to make an Apple Help Book"
- @echo " devhelp to make HTML files and a Devhelp project"
- @echo " epub to make an epub"
- @echo " epub3 to make an epub3"
- @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
- @echo " latexpdf to make LaTeX files and run them through pdflatex"
- @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
- @echo " text to make text files"
- @echo " man to make manual pages"
- @echo " texinfo to make Texinfo files"
- @echo " info to make Texinfo files and run them through makeinfo"
- @echo " gettext to make PO message catalogs"
- @echo " changes to make an overview of all changed/added/deprecated items"
- @echo " xml to make Docutils-native XML files"
- @echo " pseudoxml to make pseudoxml-XML files for display purposes"
- @echo " linkcheck to check all external links for integrity"
- @echo " doctest to run all doctests embedded in the documentation (if enabled)"
- @echo " coverage to run coverage check of the documentation (if enabled)"
- @echo " dummy to check syntax errors of document sources"
-
-.PHONY: clean
-clean:
- rm -rf $(BUILDDIR)/*
-
-.PHONY: html
-html:
- $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
- @echo
- @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
-
-.PHONY: dirhtml
-dirhtml:
- $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
- @echo
- @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
-
-.PHONY: singlehtml
-singlehtml:
- $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
- @echo
- @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
-
-.PHONY: pickle
-pickle:
- $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
- @echo
- @echo "Build finished; now you can process the pickle files."
-
-.PHONY: json
-json:
- $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
- @echo
- @echo "Build finished; now you can process the JSON files."
-
-.PHONY: htmlhelp
-htmlhelp:
- $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
- @echo
- @echo "Build finished; now you can run HTML Help Workshop with the" \
- ".hhp project file in $(BUILDDIR)/htmlhelp."
-
-.PHONY: qthelp
-qthelp:
- $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
- @echo
- @echo "Build finished; now you can run "qcollectiongenerator" with the" \
- ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
- @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/twitterpandas.qhcp"
- @echo "To view the help file:"
- @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/twitterpandas.qhc"
-
-.PHONY: applehelp
-applehelp:
- $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
- @echo
- @echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
- @echo "N.B. You won't be able to view it unless you put it in" \
- "~/Library/Documentation/Help or install it in your application" \
- "bundle."
-
-.PHONY: devhelp
-devhelp:
- $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
- @echo
- @echo "Build finished."
- @echo "To view the help file:"
- @echo "# mkdir -p $$HOME/.local/share/devhelp/twitterpandas"
- @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/twitterpandas"
- @echo "# devhelp"
-
-.PHONY: epub
-epub:
- $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
- @echo
- @echo "Build finished. The epub file is in $(BUILDDIR)/epub."
-
-.PHONY: epub3
-epub3:
- $(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3
- @echo
- @echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3."
-
-.PHONY: latex
-latex:
- $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
- @echo
- @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
- @echo "Run \`make' in that directory to run these through (pdf)latex" \
- "(use \`make latexpdf' here to do that automatically)."
-
-.PHONY: latexpdf
-latexpdf:
- $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
- @echo "Running LaTeX files through pdflatex..."
- $(MAKE) -C $(BUILDDIR)/latex all-pdf
- @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
-
-.PHONY: latexpdfja
-latexpdfja:
- $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
- @echo "Running LaTeX files through platex and dvipdfmx..."
- $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
- @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
-
-.PHONY: text
-text:
- $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
- @echo
- @echo "Build finished. The text files are in $(BUILDDIR)/text."
-
-.PHONY: man
-man:
- $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
- @echo
- @echo "Build finished. The manual pages are in $(BUILDDIR)/man."
-
-.PHONY: texinfo
-texinfo:
- $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
- @echo
- @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
- @echo "Run \`make' in that directory to run these through makeinfo" \
- "(use \`make info' here to do that automatically)."
-
-.PHONY: info
-info:
- $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
- @echo "Running Texinfo files through makeinfo..."
- make -C $(BUILDDIR)/texinfo info
- @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
-
-.PHONY: gettext
-gettext:
- $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
- @echo
- @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
-
-.PHONY: changes
-changes:
- $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
- @echo
- @echo "The overview file is in $(BUILDDIR)/changes."
-
-.PHONY: linkcheck
-linkcheck:
- $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
- @echo
- @echo "Link check complete; look for any errors in the above output " \
- "or in $(BUILDDIR)/linkcheck/output.txt."
-
-.PHONY: doctest
-doctest:
- $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
- @echo "Testing of doctests in the sources finished, look at the " \
- "results in $(BUILDDIR)/doctest/output.txt."
-
-.PHONY: coverage
-coverage:
- $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
- @echo "Testing of coverage in the sources finished, look at the " \
- "results in $(BUILDDIR)/coverage/python.txt."
-
-.PHONY: xml
-xml:
- $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
- @echo
- @echo "Build finished. The XML files are in $(BUILDDIR)/xml."
+ @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
-.PHONY: pseudoxml
-pseudoxml:
- $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
- @echo
- @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
+.PHONY: help Makefile
-.PHONY: dummy
-dummy:
- $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy
- @echo
- @echo "Build finished. Dummy builder generates no files."
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
\ No newline at end of file
diff --git a/docs/_static/.DS_Store b/docs/_static/.DS_Store
new file mode 100644
index 00000000..12e84d13
Binary files /dev/null and b/docs/_static/.DS_Store differ
diff --git a/docs/_static/overrides.css b/docs/_static/overrides.css
new file mode 100644
index 00000000..68eadba4
--- /dev/null
+++ b/docs/_static/overrides.css
@@ -0,0 +1,7 @@
+table.docutils td, table.docutils th {
+ padding: 1px 8px 1px 5px;
+ border-top: 0;
+ border-left: 0;
+ border-right: 1px dotted #33B8FF;
+ border-bottom: 1px dotted #33B8FF;
+}
\ No newline at end of file
diff --git a/docs/arguments.rst b/docs/arguments.rst
new file mode 100644
index 00000000..e18035c9
--- /dev/null
+++ b/docs/arguments.rst
@@ -0,0 +1,249 @@
+===============
+Input Arguments
+===============
+
+Link to `GitHub repo `__
+
+Link to `Documentation Homepage `__
+
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| Argument | Short hand | Description |
++===================+=============+===============================================================================================================================+
+| config_file | cf | You can pass the arguments inside a config file. This is an alternative to passing arguments on the command line directly. |
+| | | |
+| | | Please refer to the |
+| | | `config file format `__ below |
+| | | |
+| | | * If 'config_file' argument is present, the program will use the config file and command line arguments will be discarded |
+| | | * Config file can only be in **JSON** format |
+| | | * Please refrain from passing invalid arguments from config file. Refer to the below arguments list |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| keywords | k | Denotes the keywords/key phrases you want to search for. For more than one keywords, wrap it in single quotes. |
+| | | |
+| | | Tips: |
+| | | |
+| | | * If you simply type the keyword, Google will best try to match it |
+| | | * If you want to search for exact phrase, you can wrap the keywords in double quotes ("") |
+| | | * If you want to search to contain either of the words provided, use **OR** between the words. |
+| | | * If you want to explicitly not want a specific word use a minus sign before the word (-) |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| keywords_from_file| kf | Denotes the file name from where you would want to import the keywords. |
+| | | |
+| | | Add one keyword per line. Blank/Empty lines are truncated automatically. |
+| | | |
+| | | Only file types '.txt' or '.csv' are allowed. |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| prefix_keywords | pk | Denotes additional words added before main keyword while making the search query. |
+| | | |
+| | | The final search query would be: |
+| | | |
+| | | So, for example, if the keyword is 'car' and prefix_keyword is 'red,yellow,blue', it will search and download images for |
+| | | 'red car', 'yellow car' and 'blue car' individually |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| suffix_keywords | sk | Denotes additional words added after main keyword while making the search query. |
+| | | |
+| | | The final search query would be: |
+| | | |
+| | | So, for example, if the keyword is 'car' and suffix_keyword is 'red,yellow,blue', it will search and download images for |
+| | | 'car red', 'car yellow' and 'car blue' individually |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| limit | l | Denotes number of images that you want to download. |
+| | | |
+| | | You can specify any integer value here. It will try and get all the images that it finds in the google image search page. |
+| | | |
+| | | If this value is not specified, it defaults to 100. |
+| | | |
+| | | **Note**: In case of occasional errors while downloading images, you could get less than 100 (if the limit is set to 100) |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| related_images | ri | This argument downloads a ton of images related to the keyword you provided. |
+| | | |
+| | | Google Images page returns list of related keywords to the keyword you have mentioned in the query. This tool downloads |
+| | | images from each of those related keywords based on the limit you have mentioned in your query |
+| | | |
+| | | This argument does not take any value. Just add '--related_images' or '-ri' in your query. |
+| | | |
+| | | **Note:** This argument can download hundreds or thousands of additional images so please use this carefully. |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| format | f | Denotes the format/extension of the image that you want to download. |
+| | | |
+| | | `Possible values: jpg, gif, png, bmp, svg, webp, ico, raw` |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| color | co | Denotes the color filter that you want to apply to the images. |
+| | | |
+| | | `Possible values: red, orange, yellow, green, teal, blue, purple, pink, white, gray, black, brown` |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| color_type | ct | Denotes the color type you want to apply to the images. |
+| | | |
+| | | `Possible values: full-color, black-and-white, transparent` |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| usage_rights | r | Denotes the usage rights/licence under which the image is classified. |
+| | | |
+| | | `Possible values:` |
+| | | |
+| | | * `labeled-for-reuse-with-modifications`, |
+| | | * `labeled-for-reuse`, |
+| | | * `labeled-for-noncommercial-reuse-with-modification`, |
+| | | * `labeled-for-nocommercial-reuse` |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| size | s | Denotes the relative size of the image to be downloaded. |
+| | | |
+| | | `Possible values: large, medium, icon, >400*300, >640*480, >800*600, >1024*768, >2MP, >4MP, >6MP, >8MP, >10MP, |
+| | | >12MP, >15MP, >20MP, >40MP, >70MP` |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| exact_size | es | You can specify the exact size/resolution of the images |
+| | | |
+| | | This value of this argument can be specified as ```` where the fist integer stands for width of the image |
+| | | and the second integer stands for the height of the image. For example, ``-es 1024,786`` |
+| | | |
+| | | **Note**: You cannot specify both 'size' and 'exact_size' arguments in the same query. You can only give one of them. |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| aspect_ratio | a | Denotes the aspect ratio of images to download. |
+| | | |
+| | | `Possible values: tall, square, wide, panoramic` |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| type | t | Denotes the type of image to be downloaded. |
+| | | |
+| | | `Possible values: face, photo, clip-art, line-drawing, animated` |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| time | w | Denotes the time the image was uploaded/indexed. |
+| | | |
+| | | `Possible values: past-24-hours, past-7-days, past-month, past-year` |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| time_range | wr | Denotes the time range for which you want to search the images |
+| | | |
+| | | The value of this parameter should be in the following format '{"time_min":"MM/DD/YYYY","time_max":"MM/DD/YYYY"}' |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| delay | d | Time to wait between downloading two images |
+| | | |
+| | | Time is to be specified in seconds. But you can have sub-second times by using decimal points. |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| url | u | Allows you search by image when you have the URL from the Google Images page. |
+| | | It downloads images from the google images link provided |
+| | | |
+| | | If you are searching an image on the browser google images page, simply grab the browser URL and paste it in this parameter |
+| | | It will download all the images seen on that page. |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| single_image | x | Allows you to download one image if the complete (absolute) URL of the image is provided |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| output_directory | o | Allows you specify the main directory name in which the images are downloaded. |
+| | | |
+| | | If not specified, it will default to 'downloads' directory. This directory is located in the path from where you run this code|
+| | | |
+| | | The directory structure would look like: ```` |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| image_directory | i | This lets you specify a directory inside of the main directory (output_directory) in which the images will be saved |
+| | | |
+| | | If not specified, it will default to the name of the keyword. |
+| | | |
+| | | The directory structure would look like: ```` |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| no_directory | n | This option allows you download images directly in the main directory (output_directory) without an image_directory |
+| | | |
+| | | The directory structure would look like: ```` |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| proxy | px | Allows you to specify proxy server setting for all your requests |
+| | | |
+| | | You can specify the proxy settings in 'IP:Port' format |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| similar_images | si | Reverse Image Search or 'Search by Image' as it is referred to on Google. |
+| | | |
+| | | Searches and downloads images that are similar to the absolute image link/url you provide. |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| specific_site | ss | Allows you to download images with keywords only from a specific website/domain name you mention. |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| print_urls | p | Print the URLs of the images on the console. These image URLs can be used for debugging purposes |
+| | | |
+| | | This argument does not take any value. Just add '--print_urls' or '-p' in your query. |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| print_size | ps | Prints the size of the images on the console |
+| | | |
+| | | The size denoted the actual size of the image and not the size of the image on disk |
+| | | |
+| | | This argument does not take any value. Just add '--print_size' or '-ps' in your query. |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| print_paths | pp | Prints the list of all the absolute paths of the downloaded images |
+| | | |
+| | | When calling the script from another python file, this list will be saved in a variable (as shown in the example below) |
+| | | |
+| | | This argument also allows you to print the list on the console |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| metadata | m | Prints the metada of the image on the console. |
+| | | |
+| | | This includes image size, origin, image attributes, description, image URL, etc. |
+| | | |
+| | | This argument does not take any value. Just add '--metadata' or '-m' in your query. |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| extract_metadata | e | This option allows you to save metadata of all the downloaded images in a JSON file. |
+| | | |
+| | | This file can be found in the ``logs/`` directory. The name of the file would be same as the keyword name |
+| | | |
+| | | This argument does not take any value. Just add '--extract_metadata' or '-e' in your query. |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| socket_timeout | st | Allows you to specify the time to wait for socket connection. |
+| | | |
+| | | You could specify a higher timeout time for slow internet connection. The default value is 10 seconds. |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| thumbnail | th | Downloads image thumbnails corresponding to each image downloaded. |
+| | | |
+| | | Thumbnails are saved in their own sub-directories inside of the main directory. |
+| | | |
+| | | This argument does not take any value. Just add '--thumbnail' or '-th' in your query. |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| thumbnail_only | tho | Downloads only thumbnails without downloading actual size images |
+| | | |
+| | | Thumbnails are saved in their own sub-directories inside of the main directory. |
+| | | |
+| | | This argument does not take any value. Just add '--thumbnail_only' or '-tho' in your query. |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| language | la | Defines the language filter. The search results are automatically returned in that language |
+| | | |
+| | | `Possible Values: Arabic, Chinese (Simplified), Chinese (Traditional), Czech, Danish, Dutch, English, Estonian. Finnish, |
+| | | French, German, Greek, Hebrew, Hungarian, Icelandic, Italian, Japanese, Korean, Latvianm, Lithuanian, Norwegian, Portuguese, |
+| | | Polish, Romanian, Russian, Spanish, Swedish, Turkish` |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| prefix | pr | A word that you would want to prefix in front of actual image name. |
+| | | |
+| | | This feature can be used to rename files for image identification purpose. |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| chromedriver | cd | With this argument you can pass the path to the 'chromedriver'. |
+| | | |
+| | | The path looks like this: "path/to/chromedriver". In windows it will be "C:\\path\\to\\chromedriver.exe" |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| safe_search | sa | Searches for images with the Safe Search filter On |
+| | | |
+| | | And this filter will be Off by default if you do not specify the safe_search argument |
+| | | |
+| | | This argument does not take any value. Just add '--safe_search' or '-sa' in your query. |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| no_numbering | nn | When you specify this argument, the script does not add ordered numbering as prefix to the images it downloads |
+| | | |
+| | | If this argument is not specified, the images are numbered in order in which they are downloaded |
+| | | |
+| | | This argument does not take any value. Just add '--no_numbering' or '-nn' in your query. |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| offset | of | When you specify this argument, it will skip the offset number of links before it starts downloading images |
+| | | |
+| | | If this argument is not specified, the script will start downloading form the first link until the limit is reached |
+| | | |
+| | | This argument takes integer. Make sure the value of this argument is less than the value of limit |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| save_source | is | Creates a text file with list of downloaded images along with their source page paths. |
+| | | |
+| | | This argument takes a string, name of the text file. |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| no_download | nd | Print the URLs on the console without downloading images or thumbnails. These image URLs can be used for other purposes |
+| | | |
+| | | This argument does not take any value. Just add '--no-download' or '-nd' in your query. |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| silent_mode | sil | Remains silent. Does not print notification messages on the terminal/command prompt. |
+| | | |
+| | | This argument will override all the other print arguments (like print_urls, print_size, etc.) |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| ignore_urls | iu | Skip downloading of images whose urls contain certain strings such as wikipedia.org |
+| | | |
+| | | This argument takes a delimited set of values e.g. wikipedia.org,wikimedia.org |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+| help | h | show the help message regarding the usage of the above arguments |
++-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
+
+**Note:** If ``single_image`` or ``url`` parameter is not present, then keywords is a mandatory parameter. No other parameters are mandatory.
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 00000000..339dc57f
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,69 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# http://www.sphinx-doc.org/en/master/config
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+# import os
+# import sys
+# sys.path.insert(0, os.path.abspath('.'))
+version = '1.0.1'
+
+source_suffix = '.rst'
+master_doc = 'index'
+
+html_static_path = ['_static']
+
+def setup(app):
+ app.add_stylesheet('overrides.css') # may also be an URL
+
+html_context = {
+ "display_github": False, # Add 'Edit on Github' link instead of 'View page source'
+ "last_updated": True,
+ "commit": False,
+ }
+
+# -- Project information -----------------------------------------------------
+
+project = 'Google Images Download'
+copyright = '2019, Hardik Vasa'
+author = 'Hardik Vasa'
+
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+]
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages. See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'bizstyle'
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+html_sidebars = { '**': ['globaltoc.html', 'relations.html', 'searchbox.html'] }
+
diff --git a/docs/examples.rst b/docs/examples.rst
new file mode 100644
index 00000000..24b47b73
--- /dev/null
+++ b/docs/examples.rst
@@ -0,0 +1,179 @@
+========
+Examples
+========
+
+Link to `GitHub repo `__
+
+Link to `Documentation Homepage `__
+
+Link to `Input arguments or parameters `__
+
+Config File Format
+==================
+
+You can either pass the arguments directly from the command as in the examples below or you can pass it through a config file. Below is a sample of how a config
+file looks.
+
+You can pass more than one record through a config file. The below sample consist of two set of records. The code will iterate through each of the record and
+download images based on arguments passed.
+
+.. code:: json
+
+ {
+ "Records": [
+ {
+ "keywords": "apple",
+ "limit": 5,
+ "color": "green",
+ "print_urls": true
+ },
+ {
+ "keywords": "universe",
+ "limit": 15,
+ "size": "large",
+ "print_urls": true
+ }
+ ]
+ }
+
+
+Code sample - Importing the library
+===================================
+
+- If you are calling this library from another python file, below is the sample code
+
+.. code-block:: python
+
+ from google_images_download import google_images_download #importing the library
+
+ response = google_images_download.googleimagesdownload() #class instantiation
+
+ arguments = {"keywords":"Polar bears,baloons,Beaches","limit":20,"print_urls":True} #creating list of arguments
+ paths = response.download(arguments) #passing the arguments to the function
+ print(paths) #printing absolute paths of the downloaded images
+
+
+Command line examples
+=====================
+
+- If you are passing arguments from a config file, simply pass the config_file argument with name of your JSON file
+
+.. code-block:: bash
+
+ $ googleimagesdownload -cf example.json
+
+- Simple example of using keywords and limit arguments
+
+.. code-block:: bash
+
+ $ googleimagesdownload --keywords "Polar bears, baloons, Beaches" --limit 20
+
+- Using Suffix Keywords allows you to specify words after the main
+ keywords. For example if the ``keyword = car`` and
+ ``suffix keyword = 'red,blue'`` then it will first search for
+ ``car red`` and then ``car blue``
+
+.. code-block:: bash
+
+ $ googleimagesdownload --k "car" -sk 'red,blue,white' -l 10
+
+- To use the short hand command
+
+.. code-block:: bash
+
+ $ googleimagesdownload -k "Polar bears, baloons, Beaches" -l 20
+
+- To download images with specific image extension/format
+
+.. code-block:: bash
+
+ $ googleimagesdownload --keywords "logo" --format svg
+
+- To use color filters for the images
+
+.. code-block:: bash
+
+ $ googleimagesdownload -k "playground" -l 20 -co red
+
+- To use non-English keywords for image search
+
+.. code-block:: bash
+
+ $ googleimagesdownload -k "北极熊" -l 5
+
+- To download images from the google images link
+
+.. code-block:: bash
+
+ $ googleimagesdownload -k "sample" -u
+
+- To save images in specific main directory (instead of in 'downloads')
+
+.. code-block:: bash
+
+ $ googleimagesdownload -k "boat" -o "boat_new"
+
+- To download one single image with the image URL
+
+.. code-block:: bash
+
+ $ googleimagesdownload --keywords "baloons" --single_image
+
+- To download images with size and type constrains
+
+.. code-block:: bash
+
+ $ googleimagesdownload --keywords "baloons" --size medium --type animated
+
+- To download images with specific usage rights
+
+.. code-block:: bash
+
+ $ googleimagesdownload --keywords "universe" --usage_rights labeled-for-reuse
+
+- To download images with specific color type
+
+.. code-block:: bash
+
+ $ googleimagesdownload --keywords "flowers" --color_type black-and-white
+
+- To download images with specific aspect ratio
+
+.. code-block:: bash
+
+ $ googleimagesdownload --keywords "universe" --aspect_ratio panoramic
+
+- To download images which are similar to the image in the image URL that you provided (Reverse Image search).
+
+.. code-block:: bash
+
+ $ googleimagesdownload -si -l 10
+
+- To download images from specific website or domain name for a given keyword
+
+.. code-block:: bash
+
+ $ googleimagesdownload --keywords "universe" --specific_site example.com
+
+===> The images would be downloaded in their own sub-directories inside the main directory
+(either the one you provided or in 'downloads') in the same folder you are in.
+
+
+Library extensions
+==================
+
+The downloading algorithm does a good job of keeping out corrupt images. However it is not ideal. There are still some chances of getting one-off corrupt image that cannot be used for processing. Below script will help clean those corrupt image files. This script was ideated by @devajith in `Issue 81 `__.
+
+.. code:: python
+
+ import os
+ from PIL import Image
+
+ img_dir = r"path/to/downloads/directory"
+ for filename in os.listdir(img_dir):
+ try :
+ with Image.open(img_dir + "/" + filename) as im:
+ print('ok')
+ except :
+ print(img_dir + "/" + filename)
+ os.remove(img_dir + "/" + filename)
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 00000000..3e276d00
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,133 @@
+======================
+Google Images Download
+======================
+
+Link to `GitHub repo `__
+
+.. index:: Summary
+
+Summary
+=======
+
+This is a command line python program to search keywords/key-phrases on Google Images
+and optionally download images to your computer. You can also invoke this script from
+another python file.
+
+This is a small and ready-to-run program. No dependencies are required to be installed
+if you would only want to download up to 100 images per keyword. If you would want **more than 100
+images** per keyword, then you would need to install ``Selenium`` library along with ``chromedriver``.
+Detailed instructions in the troubleshooting section.
+
+
+.. index:: Compatability
+
+Compatibility
+=============
+
+This program is compatible with both the versions of python - 2.x and 3.x (recommended).
+It is a download-and-run program with no changes to the file.
+You will just have to specify parameters through the command line.
+
+.. index:: Installation
+
+Installation
+============
+
+The guide provides detailed instructions on how to install the library.
+
+.. toctree::
+ :maxdepth: 3
+
+ installation
+
+.. index:: Usage
+
+Usage
+=====
+
+The following section provides details on using the library - from CLI or by standard imports.
+
+.. toctree::
+ :maxdepth: 3
+
+ usage
+
+.. index:: Arguments
+
+Arguments
+=========
+
+This section provides all the arguments/parameters/options you can provide to this library.
+
+.. toctree::
+ :maxdepth: 3
+
+ arguments
+
+.. index:: Examples
+
+Examples
+========
+
+Many examples have been provided to help new users quickly ramp up the the usage.
+
+.. toctree::
+ :maxdepth: 3
+
+ examples
+
+.. index:: Troubleshooting
+
+Troubleshooting
+===============
+
+This section proviedes troubleshooting guide for commonly seen issues.
+
+.. toctree::
+ :maxdepth: 2
+
+ troubleshooting
+
+.. index:: Workflow
+
+Workflow
+========
+
+Workflow showcases the algorithm used within this module to download the images.
+
+.. toctree::
+ :maxdepth: 2
+
+ structure
+
+.. index:: Contribute
+
+Contribute
+==========
+
+Anyone is welcomed to contribute to this script.
+If you would like to make a change, open a pull request.
+For issues and discussion visit the
+`Issue Tracker `__.
+
+The aim of this repo is to keep it simple, stand-alone, backward compatible and 3rd party dependency proof.
+
+.. index:: Disclaimer
+
+Disclaimer
+==========
+
+.. warning::
+
+ This program lets you download tons of images from Google.
+ Please do not download or use any image that violates its copyright terms.
+ Google Images is a search engine that merely indexes images and allows you to find them.
+ It does NOT produce its own images and, as such, it doesn't own copyright on any of them.
+ The original creators of the images own the copyrights.
+
+ Images published in the United States are automatically copyrighted by their owners,
+ even if they do not explicitly carry a copyright warning.
+ You may not reproduce copyright images without their owner's permission,
+ except in "fair use" cases,
+ or you could risk running into lawyer's warnings, cease-and-desist letters, and copyright suits.
+ Please be very careful before its usage!
diff --git a/docs/installation.rst b/docs/installation.rst
new file mode 100644
index 00000000..457f3ef0
--- /dev/null
+++ b/docs/installation.rst
@@ -0,0 +1,29 @@
+============
+Installation
+============
+
+Link to `Documentation Homepage `__
+
+You can use **one of the below methods** to download and use this repository.
+
+Install using pip
+-----------------
+
+.. code-block:: bash
+
+ $ pip install google_images_download
+
+
+Manually install using CLI
+--------------------------
+
+.. code-block:: bash
+
+ $ git clone https://github.com/hardikvasa/google-images-download.git
+ $ cd google-images-download && sudo python setup.py install
+
+
+Manually install using UI
+-------------------------
+
+Go to the `repo on github `__ ==> Click on 'Clone or Download' ==> Click on 'Download ZIP' and save it on your local disk.
\ No newline at end of file
diff --git a/docs/make.bat b/docs/make.bat
index 509abe3c..27f573b8 100644
--- a/docs/make.bat
+++ b/docs/make.bat
@@ -1,281 +1,35 @@
-@ECHO OFF
-
-REM Command file for Sphinx documentation
-
-if "%SPHINXBUILD%" == "" (
- set SPHINXBUILD=sphinx-build
-)
-set BUILDDIR=build
-set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source
-set I18NSPHINXOPTS=%SPHINXOPTS% source
-if NOT "%PAPER%" == "" (
- set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
- set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
-)
-
-if "%1" == "" goto help
-
-if "%1" == "help" (
- :help
- echo.Please use `make ^` where ^ is one of
- echo. html to make standalone HTML files
- echo. dirhtml to make HTML files named index.html in directories
- echo. singlehtml to make a single large HTML file
- echo. pickle to make pickle files
- echo. json to make JSON files
- echo. htmlhelp to make HTML files and a HTML help project
- echo. qthelp to make HTML files and a qthelp project
- echo. devhelp to make HTML files and a Devhelp project
- echo. epub to make an epub
- echo. epub3 to make an epub3
- echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter
- echo. text to make text files
- echo. man to make manual pages
- echo. texinfo to make Texinfo files
- echo. gettext to make PO message catalogs
- echo. changes to make an overview over all changed/added/deprecated items
- echo. xml to make Docutils-native XML files
- echo. pseudoxml to make pseudoxml-XML files for display purposes
- echo. linkcheck to check all external links for integrity
- echo. doctest to run all doctests embedded in the documentation if enabled
- echo. coverage to run coverage check of the documentation if enabled
- echo. dummy to check syntax errors of document sources
- goto end
-)
-
-if "%1" == "clean" (
- for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
- del /q /s %BUILDDIR%\*
- goto end
-)
-
-
-REM Check if sphinx-build is available and fallback to Python version if any
-%SPHINXBUILD% 1>NUL 2>NUL
-if errorlevel 9009 goto sphinx_python
-goto sphinx_ok
-
-:sphinx_python
-
-set SPHINXBUILD=python -m sphinx.__init__
-%SPHINXBUILD% 2> nul
-if errorlevel 9009 (
- echo.
- echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
- echo.installed, then set the SPHINXBUILD environment variable to point
- echo.to the full path of the 'sphinx-build' executable. Alternatively you
- echo.may add the Sphinx directory to PATH.
- echo.
- echo.If you don't have Sphinx installed, grab it from
- echo.http://sphinx-doc.org/
- exit /b 1
-)
-
-:sphinx_ok
-
-
-if "%1" == "html" (
- %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The HTML pages are in %BUILDDIR%/html.
- goto end
-)
-
-if "%1" == "dirhtml" (
- %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
- goto end
-)
-
-if "%1" == "singlehtml" (
- %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
- goto end
-)
-
-if "%1" == "pickle" (
- %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished; now you can process the pickle files.
- goto end
-)
-
-if "%1" == "json" (
- %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished; now you can process the JSON files.
- goto end
-)
-
-if "%1" == "htmlhelp" (
- %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished; now you can run HTML Help Workshop with the ^
-.hhp project file in %BUILDDIR%/htmlhelp.
- goto end
-)
-
-if "%1" == "qthelp" (
- %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished; now you can run "qcollectiongenerator" with the ^
-.qhcp project file in %BUILDDIR%/qthelp, like this:
- echo.^> qcollectiongenerator %BUILDDIR%\qthelp\twitterpandas.qhcp
- echo.To view the help file:
- echo.^> assistant -collectionFile %BUILDDIR%\qthelp\twitterpandas.ghc
- goto end
-)
-
-if "%1" == "devhelp" (
- %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished.
- goto end
-)
-
-if "%1" == "epub" (
- %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The epub file is in %BUILDDIR%/epub.
- goto end
-)
-
-if "%1" == "epub3" (
- %SPHINXBUILD% -b epub3 %ALLSPHINXOPTS% %BUILDDIR%/epub3
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The epub3 file is in %BUILDDIR%/epub3.
- goto end
-)
-
-if "%1" == "latex" (
- %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
- goto end
-)
-
-if "%1" == "latexpdf" (
- %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
- cd %BUILDDIR%/latex
- make all-pdf
- cd %~dp0
- echo.
- echo.Build finished; the PDF files are in %BUILDDIR%/latex.
- goto end
-)
-
-if "%1" == "latexpdfja" (
- %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
- cd %BUILDDIR%/latex
- make all-pdf-ja
- cd %~dp0
- echo.
- echo.Build finished; the PDF files are in %BUILDDIR%/latex.
- goto end
-)
-
-if "%1" == "text" (
- %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The text files are in %BUILDDIR%/text.
- goto end
-)
-
-if "%1" == "man" (
- %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The manual pages are in %BUILDDIR%/man.
- goto end
-)
-
-if "%1" == "texinfo" (
- %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
- goto end
-)
-
-if "%1" == "gettext" (
- %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
- goto end
-)
-
-if "%1" == "changes" (
- %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
- if errorlevel 1 exit /b 1
- echo.
- echo.The overview file is in %BUILDDIR%/changes.
- goto end
-)
-
-if "%1" == "linkcheck" (
- %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
- if errorlevel 1 exit /b 1
- echo.
- echo.Link check complete; look for any errors in the above output ^
-or in %BUILDDIR%/linkcheck/output.txt.
- goto end
-)
-
-if "%1" == "doctest" (
- %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
- if errorlevel 1 exit /b 1
- echo.
- echo.Testing of doctests in the sources finished, look at the ^
-results in %BUILDDIR%/doctest/output.txt.
- goto end
-)
-
-if "%1" == "coverage" (
- %SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage
- if errorlevel 1 exit /b 1
- echo.
- echo.Testing of coverage in the sources finished, look at the ^
-results in %BUILDDIR%/coverage/python.txt.
- goto end
-)
-
-if "%1" == "xml" (
- %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The XML files are in %BUILDDIR%/xml.
- goto end
-)
-
-if "%1" == "pseudoxml" (
- %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
- goto end
-)
-
-if "%1" == "dummy" (
- %SPHINXBUILD% -b dummy %ALLSPHINXOPTS% %BUILDDIR%/dummy
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. Dummy builder generates no files.
- goto end
-)
-
-:end
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+ set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=.
+set BUILDDIR=_build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+ echo.
+ echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+ echo.installed, then set the SPHINXBUILD environment variable to point
+ echo.to the full path of the 'sphinx-build' executable. Alternatively you
+ echo.may add the Sphinx directory to PATH.
+ echo.
+ echo.If you don't have Sphinx installed, grab it from
+ echo.http://sphinx-doc.org/
+ exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
+
+:end
+popd
diff --git a/docs/source/conf.py b/docs/source/conf.py
deleted file mode 100644
index 9d43f086..00000000
--- a/docs/source/conf.py
+++ /dev/null
@@ -1,289 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-#
-# google-images-download documentation build configuration file, created by
-# cookiecutter pipproject
-#
-# This file is execfile()d with the current directory set to its
-# containing dir.
-#
-# Note that not all possible configuration values are present in this
-# autogenerated file.
-#
-# All configuration values have a default; values that are commented out
-# serve to show the default.
-
-import sys
-import os
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-sys.path.insert(0, os.path.abspath('../..'))
-
-# -- General configuration ------------------------------------------------
-
-# If your documentation needs a minimal Sphinx version, state it here.
-#needs_sphinx = '1.0'
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
-extensions = [
- 'sphinx.ext.autodoc',
-]
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
-
-# The suffix(es) of source filenames.
-# You can specify multiple suffix as a list of string:
-# source_suffix = ['.rst', '.md']
-source_suffix = '.rst'
-
-# The encoding of source files.
-#source_encoding = 'utf-8-sig'
-
-# The master toctree document.
-master_doc = 'index'
-
-# General information about the project.
-project = 'google-images-download'
-copyright = '2016, Hardik Vasa'
-author = 'Hardik Vasa'
-
-# The version info for the project you're documenting, acts as replacement for
-# |version| and |release|, also used in various other places throughout the
-# built documents.
-#
-# The short X.Y version.
-version = '1.0.0'
-# The full version, including alpha/beta/rc tags.
-release = '1.0.0'
-
-# The language for content autogenerated by Sphinx. Refer to documentation
-# for a list of supported languages.
-#
-# This is also used if you do content translation via gettext catalogs.
-# Usually you set "language" from the command line for these cases.
-language = None
-
-# There are two options for replacing |today|: either, you set today to some
-# non-false value, then it is used:
-#today = ''
-# Else, today_fmt is used as the format for a strftime call.
-#today_fmt = '%B %d, %Y'
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-# This patterns also effect to html_static_path and html_extra_path
-exclude_patterns = []
-
-# The reST default role (used for this markup: `text`) to use for all
-# documents.
-#default_role = None
-
-# If true, '()' will be appended to :func: etc. cross-reference text.
-#add_function_parentheses = True
-
-# If true, the current module name will be prepended to all description
-# unit titles (such as .. function::).
-#add_module_names = True
-
-# If true, sectionauthor and moduleauthor directives will be shown in the
-# output. They are ignored by default.
-#show_authors = False
-
-# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
-
-# A list of ignored prefixes for module index sorting.
-#modindex_common_prefix = []
-
-# If true, keep warnings as "system message" paragraphs in the built documents.
-#keep_warnings = False
-
-# If true, `todo` and `todoList` produce output, else they produce nothing.
-todo_include_todos = False
-
-
-# -- Options for HTML output ----------------------------------------------
-
-# The theme to use for HTML and HTML Help pages. See the documentation for
-# a list of builtin themes.
-html_theme = 'sphinx_rtd_theme'
-
-# Theme options are theme-specific and customize the look and feel of a theme
-# further. For a list of options available for each theme, see the
-# documentation.
-#html_theme_options = {}
-
-# Add any paths that contain custom themes here, relative to this directory.
-#html_theme_path = []
-
-# The name for this set of Sphinx documents.
-# " v documentation" by default.
-#html_title = 'google-images-download v1.0.0'
-
-# A shorter title for the navigation bar. Default is the same as html_title.
-#html_short_title = None
-
-# The name of an image file (relative to this directory) to place at the top
-# of the sidebar.
-#html_logo = None
-
-# The name of an image file (relative to this directory) to use as a favicon of
-# the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
-# pixels large.
-#html_favicon = None
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
-
-# Add any extra paths that contain custom files (such as robots.txt or
-# .htaccess) here, relative to this directory. These files are copied
-# directly to the root of the documentation.
-#html_extra_path = []
-
-# If not None, a 'Last updated on:' timestamp is inserted at every page
-# bottom, using the given strftime format.
-# The empty string is equivalent to '%b %d, %Y'.
-#html_last_updated_fmt = None
-
-# If true, SmartyPants will be used to convert quotes and dashes to
-# typographically correct entities.
-#html_use_smartypants = True
-
-# Custom sidebar templates, maps document names to template names.
-#html_sidebars = {}
-
-# Additional templates that should be rendered to pages, maps page names to
-# template names.
-#html_additional_pages = {}
-
-# If false, no module index is generated.
-#html_domain_indices = True
-
-# If false, no index is generated.
-#html_use_index = True
-
-# If true, the index is split into individual pages for each letter.
-#html_split_index = False
-
-# If true, links to the reST sources are added to the pages.
-#html_show_sourcelink = True
-
-# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
-#html_show_sphinx = True
-
-# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
-#html_show_copyright = True
-
-# If true, an OpenSearch description file will be output, and all pages will
-# contain a tag referring to it. The value of this option must be the
-# base URL from which the finished HTML is served.
-#html_use_opensearch = ''
-
-# This is the file name suffix for HTML files (e.g. ".xhtml").
-#html_file_suffix = None
-
-# Language to be used for generating the HTML full-text search index.
-# Sphinx supports the following languages:
-# 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja'
-# 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr', 'zh'
-#html_search_language = 'en'
-
-# A dictionary with options for the search language support, empty by default.
-# 'ja' uses this config value.
-# 'zh' user can custom change `jieba` dictionary path.
-#html_search_options = {'type': 'default'}
-
-# The name of a javascript file (relative to the configuration directory) that
-# implements a search results scorer. If empty, the default will be used.
-#html_search_scorer = 'scorer.js'
-
-# Output file base name for HTML help builder.
-htmlhelp_basename = 'google-images-downloaddoc'
-
-# -- Options for LaTeX output ---------------------------------------------
-
-latex_elements = {
-# The paper size ('letterpaper' or 'a4paper').
-#'papersize': 'letterpaper',
-
-# The font size ('10pt', '11pt' or '12pt').
-#'pointsize': '10pt',
-
-# Additional stuff for the LaTeX preamble.
-#'preamble': '',
-
-# Latex figure (float) alignment
-#'figure_align': 'htbp',
-}
-
-# Grouping the document tree into LaTeX files. List of tuples
-# (source start file, target name, title,
-# author, documentclass [howto, manual, or own class]).
-latex_documents = [
- (master_doc, 'google-images-download.tex', 'google-images-download Documentation',
- 'Hardik Vasa', 'manual'),
-]
-
-# The name of an image file (relative to this directory) to place at the top of
-# the title page.
-#latex_logo = None
-
-# For "manual" documents, if this is true, then toplevel headings are parts,
-# not chapters.
-#latex_use_parts = False
-
-# If true, show page references after internal links.
-#latex_show_pagerefs = False
-
-# If true, show URL addresses after external links.
-#latex_show_urls = False
-
-# Documents to append as an appendix to all manuals.
-#latex_appendices = []
-
-# If false, no module index is generated.
-#latex_domain_indices = True
-
-
-# -- Options for manual page output ---------------------------------------
-
-# One entry per manual page. List of tuples
-# (source start file, name, description, authors, manual section).
-man_pages = [
- (master_doc, 'google-images-download', 'google-images-download Documentation',
- [author], 1)
-]
-
-# If true, show URL addresses after external links.
-#man_show_urls = False
-
-
-# -- Options for Texinfo output -------------------------------------------
-
-# Grouping the document tree into Texinfo files. List of tuples
-# (source start file, target name, title, author,
-# dir menu entry, description, category)
-texinfo_documents = [
- (master_doc, 'google-images-download', 'google-images-download Documentation',
- author, 'google-images-download', 'One line description of project.',
- 'Miscellaneous'),
-]
-
-# Documents to append as an appendix to all manuals.
-#texinfo_appendices = []
-
-# If false, no module index is generated.
-#texinfo_domain_indices = True
-
-# How to display URL addresses: 'footnote', 'no', or 'inline'.
-#texinfo_show_urls = 'footnote'
-
-# If true, do not generate a @detailmenu in the "Top" node's menu.
-#texinfo_no_detailmenu = False
diff --git a/docs/source/index.rst b/docs/source/index.rst
deleted file mode 100644
index 4cffb875..00000000
--- a/docs/source/index.rst
+++ /dev/null
@@ -1,17 +0,0 @@
-Welcome to google-images-download's documentation!
-=========================================
-
-Contents:
-
-.. toctree::
- :maxdepth: 2
-
-
-
-Indices and tables
-==================
-
-* :ref:`genindex`
-* :ref:`modindex`
-* :ref:`search`
-
diff --git a/docs/structure.rst b/docs/structure.rst
new file mode 100644
index 00000000..8e142fde
--- /dev/null
+++ b/docs/structure.rst
@@ -0,0 +1,12 @@
+========
+Workflow
+========
+
+Link to `GitHub repo `__
+
+Link to `Documentation Homepage `__
+
+Below diagram represents the algorithm logic to download images.
+
+.. figure:: http://www.zseries.in/flow-chart.png
+ :alt:
\ No newline at end of file
diff --git a/docs/troubleshooting.rst b/docs/troubleshooting.rst
new file mode 100644
index 00000000..05e0aee6
--- /dev/null
+++ b/docs/troubleshooting.rst
@@ -0,0 +1,94 @@
+=============================
+Troubleshooting Errors/Issues
+=============================
+
+Link to `GitHub repo `__
+
+Link to `Documentation Homepage `__
+
+SSL Errors
+==========
+
+If you do see SSL errors on Mac for Python 3,
+please go to Finder —> Applications —> Python 3 —> Click on the ‘Install Certificates.command’
+and run the file.
+
+googleimagesdownload: command not found
+=======================================
+
+While using the above commands, if you get ``Error: -bash: googleimagesdownload: command not found`` then you have to set the correct path variable.
+
+To get the details of the repo, run the following command:
+
+.. code-block:: bash
+
+ $ pip show -f google_images_download
+
+you will get the result like this:
+
+.. code-block:: bash
+
+ Location: /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages
+ Files:
+ ../../../bin/googleimagesdownload
+
+together they make: ``/Library/Frameworks/Python.framework/Versions/2.7/bin`` which you need add it to the path:
+
+.. code-block:: bash
+
+ $ export PATH="/Library/Frameworks/Python.framework/Versions/2.7/bin"
+
+
+[Errno 13] Permission denied creating directory 'downloads'
+===========================================================
+
+When you run the command, it downloads the images in the current directory (the directory from where you are running the command). If you get permission denied error for creating the `downloads directory`, then move to a directory in which you have the write permission and then run the command again.
+
+
+Permission denied while installing the library
+==============================================
+
+On MAC and Linux, when you get permission denied when installing the library using pip, try doing a user install.
+
+.. code-block:: bash
+
+ $ pip install google_images_download --user
+
+You can also run pip install as a superuser with ``sudo pip install google_images_download`` but it is not generally a good idea because it can cause issues with your system-level packages.
+
+
+Installing the chromedriver (with Selenium)
+===========================================
+
+If you would want to download more than 100 images per keyword, then you will need to install 'selenium' library along with 'chromedriver' extension.
+
+If you have pip-installed the library or had run the setup.py file, Selenium would have automatically installed on your machine. You will also need Chrome browser on your machine. For chromedriver:
+
+`Download the correct chromedriver `__ based on your operating system.
+
+On **Windows** or **MAC** if for some reason the chromedriver gives you trouble, download it under the current directory and run the command.
+
+On windows however, the path to chromedriver has to be given in the following format:
+
+``C:\\complete\\path\\to\\chromedriver.exe``
+
+On **Linux** if you are having issues installing google chrome browser, refer to this `CentOS or Amazon Linux Guide `__
+or `Ubuntu Guide `__
+
+For **All the operating systems** you will have to use '--chromedriver' or '-cd' argument to specify the path of
+chromedriver that you have downloaded in your machine.
+
+If on any rare occasion the chromedriver does not work for you, try downgrading it to a lower version.
+
+
+urlopen error [SSL: CERTIFICATE_VERIFY_FAILED]
+==============================================
+
+`Reference to this issue `__
+
+Use the below command to install the SSL certificate on your machine.
+
+.. code-block:: bash
+
+ cd /Applications/Python\ 3.7/
+ ./Install\ Certificates.command
diff --git a/docs/usage.rst b/docs/usage.rst
new file mode 100644
index 00000000..a190167b
--- /dev/null
+++ b/docs/usage.rst
@@ -0,0 +1,37 @@
+=====
+Usage
+=====
+
+Link to `GitHub repo `__
+
+Link to `Documentation Homepage `__
+
+Using the library from Command Line Interface
+=============================================
+
+If installed via pip or using CLI, use the following command:
+
+.. code-block:: bash
+
+ $ googleimagesdownload [Arguments...]
+
+If downloaded via the UI, unzip the file downloaded, go to the 'google_images_download' directory and use one of the below commands:
+
+.. code-block:: bash
+
+ $ python3 google_images_download.py [Arguments...]
+ OR
+ $ python google_images_download.py [Arguments...]
+
+
+Using the library from another python file
+==========================================
+
+If you would want to use this library from another python file, you could use it as shown below:
+
+.. code-block:: python
+
+ from google_images_download import google_images_download
+
+ response = google_images_download.googleimagesdownload()
+ absolute_image_paths = response.download({})
\ No newline at end of file
diff --git a/google_images_download/__init__.py b/google_images_download/__init__.py
index ca2c57fb..2d0a5746 100644
--- a/google_images_download/__init__.py
+++ b/google_images_download/__init__.py
@@ -1,5 +1,9 @@
+#!/usr/bin/env python
+from __future__ import absolute_import
+
+
def main():
import google_images_download.google_images_download
if __name__ == '__main__':
- main()
\ No newline at end of file
+ main()
diff --git a/google_images_download/__main__.py b/google_images_download/__main__.py
new file mode 100644
index 00000000..c82f672d
--- /dev/null
+++ b/google_images_download/__main__.py
@@ -0,0 +1,7 @@
+#!/usr/bin/env python
+from __future__ import absolute_import
+
+from .__init__ import main
+
+if __name__ == '__main__':
+ main()
diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py
old mode 100644
new mode 100755
index bee2dd4a..fd89a3a9
--- a/google_images_download/google_images_download.py
+++ b/google_images_download/google_images_download.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
# In[ ]:
# coding: utf-8
@@ -12,406 +13,999 @@
from urllib.request import Request, urlopen
from urllib.request import URLError, HTTPError
from urllib.parse import quote
+ import http.client
+ from http.client import IncompleteRead, BadStatusLine
+ http.client._MAXHEADERS = 1000
else: # If the Current Version of Python is 2.x
import urllib2
from urllib2 import Request, urlopen
from urllib2 import URLError, HTTPError
from urllib import quote
+ import httplib
+ from httplib import IncompleteRead, BadStatusLine
+ httplib._MAXHEADERS = 1000
import time # Importing the time library to check the time of code execution
import os
import argparse
import ssl
import datetime
+import json
+import re
+import codecs
+import socket
+
+args_list = ["keywords", "keywords_from_file", "prefix_keywords", "suffix_keywords",
+ "limit", "format", "color", "color_type", "usage_rights", "size",
+ "exact_size", "aspect_ratio", "type", "time", "time_range", "delay", "url", "single_image",
+ "output_directory", "image_directory", "no_directory", "proxy", "similar_images", "specific_site",
+ "print_urls", "print_size", "print_paths", "metadata", "extract_metadata", "socket_timeout",
+ "thumbnail", "thumbnail_only", "language", "prefix", "chromedriver", "related_images", "safe_search", "no_numbering",
+ "offset", "no_download","save_source","silent_mode","ignore_urls"]
+
+
+def user_input():
+ config = argparse.ArgumentParser()
+ config.add_argument('-cf', '--config_file', help='config file name', default='', type=str, required=False)
+ config_file_check = config.parse_known_args()
+ object_check = vars(config_file_check[0])
+
+ if object_check['config_file'] != '':
+ records = []
+ json_file = json.load(open(config_file_check[0].config_file))
+ for record in range(0,len(json_file['Records'])):
+ arguments = {}
+ for i in args_list:
+ arguments[i] = None
+ for key, value in json_file['Records'][record].items():
+ arguments[key] = value
+ records.append(arguments)
+ records_count = len(records)
+ else:
+ # Taking command line arguments from users
+ parser = argparse.ArgumentParser()
+ parser.add_argument('-k', '--keywords', help='delimited list input', type=str, required=False)
+ parser.add_argument('-kf', '--keywords_from_file', help='extract list of keywords from a text file', type=str, required=False)
+ parser.add_argument('-sk', '--suffix_keywords', help='comma separated additional words added after to main keyword', type=str, required=False)
+ parser.add_argument('-pk', '--prefix_keywords', help='comma separated additional words added before main keyword', type=str, required=False)
+ parser.add_argument('-l', '--limit', help='delimited list input', type=str, required=False)
+ parser.add_argument('-f', '--format', help='download images with specific format', type=str, required=False,
+ choices=['jpg', 'gif', 'png', 'bmp', 'svg', 'webp', 'ico'])
+ parser.add_argument('-u', '--url', help='search with google image URL', type=str, required=False)
+ parser.add_argument('-x', '--single_image', help='downloading a single image from URL', type=str, required=False)
+ parser.add_argument('-o', '--output_directory', help='download images in a specific main directory', type=str, required=False)
+ parser.add_argument('-i', '--image_directory', help='download images in a specific sub-directory', type=str, required=False)
+ parser.add_argument('-n', '--no_directory', default=False, help='download images in the main directory but no sub-directory', action="store_true")
+ parser.add_argument('-d', '--delay', help='delay in seconds to wait between downloading two images', type=int, required=False)
+ parser.add_argument('-co', '--color', help='filter on color', type=str, required=False,
+ choices=['red', 'orange', 'yellow', 'green', 'teal', 'blue', 'purple', 'pink', 'white', 'gray', 'black', 'brown'])
+ parser.add_argument('-ct', '--color_type', help='filter on color', type=str, required=False,
+ choices=['full-color', 'black-and-white', 'transparent'])
+ parser.add_argument('-r', '--usage_rights', help='usage rights', type=str, required=False,
+ choices=['labeled-for-reuse-with-modifications','labeled-for-reuse','labeled-for-noncommercial-reuse-with-modification','labeled-for-nocommercial-reuse'])
+ parser.add_argument('-s', '--size', help='image size', type=str, required=False,
+ choices=['large','medium','icon','>400*300','>640*480','>800*600','>1024*768','>2MP','>4MP','>6MP','>8MP','>10MP','>12MP','>15MP','>20MP','>40MP','>70MP'])
+ parser.add_argument('-es', '--exact_size', help='exact image resolution "WIDTH,HEIGHT"', type=str, required=False)
+ parser.add_argument('-t', '--type', help='image type', type=str, required=False,
+ choices=['face','photo','clipart','line-drawing','animated'])
+ parser.add_argument('-w', '--time', help='image age', type=str, required=False,
+ choices=['past-24-hours','past-7-days','past-month','past-year'])
+ parser.add_argument('-wr', '--time_range', help='time range for the age of the image. should be in the format {"time_min":"MM/DD/YYYY","time_max":"MM/DD/YYYY"}', type=str, required=False)
+ parser.add_argument('-a', '--aspect_ratio', help='comma separated additional words added to keywords', type=str, required=False,
+ choices=['tall', 'square', 'wide', 'panoramic'])
+ parser.add_argument('-si', '--similar_images', help='downloads images very similar to the image URL you provide', type=str, required=False)
+ parser.add_argument('-ss', '--specific_site', help='downloads images that are indexed from a specific website', type=str, required=False)
+ parser.add_argument('-p', '--print_urls', default=False, help="Print the URLs of the images", action="store_true")
+ parser.add_argument('-ps', '--print_size', default=False, help="Print the size of the images on disk", action="store_true")
+ parser.add_argument('-pp', '--print_paths', default=False, help="Prints the list of absolute paths of the images",action="store_true")
+ parser.add_argument('-m', '--metadata', default=False, help="Print the metadata of the image", action="store_true")
+ parser.add_argument('-e', '--extract_metadata', default=False, help="Dumps all the logs into a text file", action="store_true")
+ parser.add_argument('-st', '--socket_timeout', default=False, help="Connection timeout waiting for the image to download", type=float)
+ parser.add_argument('-th', '--thumbnail', default=False, help="Downloads image thumbnail along with the actual image", action="store_true")
+ parser.add_argument('-tho', '--thumbnail_only', default=False, help="Downloads only thumbnail without downloading actual images", action="store_true")
+ parser.add_argument('-la', '--language', default=False, help="Defines the language filter. The search results are authomatically returned in that language", type=str, required=False,
+ choices=['Arabic','Chinese (Simplified)','Chinese (Traditional)','Czech','Danish','Dutch','English','Estonian','Finnish','French','German','Greek','Hebrew','Hungarian','Icelandic','Italian','Japanese','Korean','Latvian','Lithuanian','Norwegian','Portuguese','Polish','Romanian','Russian','Spanish','Swedish','Turkish'])
+ parser.add_argument('-pr', '--prefix', default=False, help="A word that you would want to prefix in front of each image name", type=str, required=False)
+ parser.add_argument('-px', '--proxy', help='specify a proxy address and port', type=str, required=False)
+ parser.add_argument('-cd', '--chromedriver', help='specify the path to chromedriver executable in your local machine', type=str, required=False)
+ parser.add_argument('-ri', '--related_images', default=False, help="Downloads images that are similar to the keyword provided", action="store_true")
+ parser.add_argument('-sa', '--safe_search', default=False, help="Turns on the safe search filter while searching for images", action="store_true")
+ parser.add_argument('-nn', '--no_numbering', default=False, help="Allows you to exclude the default numbering of images", action="store_true")
+ parser.add_argument('-of', '--offset', help="Where to start in the fetched links", type=str, required=False)
+ parser.add_argument('-nd', '--no_download', default=False, help="Prints the URLs of the images and/or thumbnails without downloading them", action="store_true")
+ parser.add_argument('-iu', '--ignore_urls', default=False, help="delimited list input of image urls/keywords to ignore", type=str)
+ parser.add_argument('-sil', '--silent_mode', default=False, help="Remains silent. Does not print notification messages on the terminal", action="store_true")
+ parser.add_argument('-is', '--save_source', help="creates a text file containing a list of downloaded images along with source page url", type=str, required=False)
+
+ args = parser.parse_args()
+ arguments = vars(args)
+ records = []
+ records.append(arguments)
+ return records
+
+
+class googleimagesdownload:
+ def __init__(self):
+ pass
+
+ # Downloading entire Web Document (Raw Page Content)
+ def download_page(self,url):
+ version = (3, 0)
+ cur_version = sys.version_info
+ if cur_version >= version: # If the Current Version of Python is 3.0 or above
+ try:
+ headers = {}
+ headers['User-Agent'] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
+ req = urllib.request.Request(url, headers=headers)
+ resp = urllib.request.urlopen(req)
+ respData = str(resp.read())
+ return respData
+ except Exception as e:
+ print("Could not open URL. Please check your internet connection and/or ssl settings \n"
+ "If you are using proxy, make sure your proxy settings is configured correctly")
+ sys.exit()
+ else: # If the Current Version of Python is 2.x
+ try:
+ headers = {}
+ headers['User-Agent'] = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"
+ req = urllib2.Request(url, headers=headers)
+ try:
+ response = urllib2.urlopen(req)
+ except URLError: # Handling SSL certificate failed
+ context = ssl._create_unverified_context()
+ response = urlopen(req, context=context)
+ page = response.read()
+ return page
+ except:
+ print("Could not open URL. Please check your internet connection and/or ssl settings \n"
+ "If you are using proxy, make sure your proxy settings is configured correctly")
+ sys.exit()
+ return "Page Not found"
+
+
+ # Download Page for more than 100 images
+ def download_extended_page(self,url,chromedriver):
+ from selenium import webdriver
+ from selenium.webdriver.common.keys import Keys
+ if sys.version_info[0] < 3:
+ reload(sys)
+ sys.setdefaultencoding('utf8')
+ options = webdriver.ChromeOptions()
+ options.add_argument('--no-sandbox')
+ options.add_argument("--headless")
-# Taking command line arguments from users
-parser = argparse.ArgumentParser()
-parser.add_argument('-k', '--keywords', help='delimited list input', type=str, required=False)
-parser.add_argument('-sk', '--suffix_keywords', help='comma separated additional words added to main keyword', type=str, required=False)
-parser.add_argument('-l', '--limit', help='delimited list input', type=str, required=False)
-parser.add_argument('-f', '--format', help='download images with specific format', type=str, required=False,
- choices=['jpg', 'gif', 'png', 'bmp', 'svg', 'webp', 'ico'])
-parser.add_argument('-u', '--url', help='search with google image URL', type=str, required=False)
-parser.add_argument('-x', '--single_image', help='downloading a single image from URL', type=str, required=False)
-parser.add_argument('-o', '--output_directory', help='download images in a specific directory', type=str, required=False)
-parser.add_argument('-d', '--delay', help='delay in seconds to wait between downloading two images', type=str, required=False)
-parser.add_argument('-c', '--color', help='filter on color', type=str, required=False,
- choices=['red', 'orange', 'yellow', 'green', 'teal', 'blue', 'purple', 'pink', 'white', 'gray', 'black', 'brown'])
-parser.add_argument('-ct', '--color_type', help='filter on color', type=str, required=False,
- choices=['full-color', 'black-and-white', 'transparent'])
-parser.add_argument('-r', '--usage_rights', help='usage rights', type=str, required=False,
- choices=['labled-for-reuse-with-modifications','labled-for-reuse','labled-for-noncommercial-reuse-with-modification','labled-for-nocommercial-reuse'])
-parser.add_argument('-s', '--size', help='image size', type=str, required=False,
- choices=['large','medium','icon'])
-parser.add_argument('-t', '--type', help='image type', type=str, required=False,
- choices=['face','photo','clip-art','line-drawing','animated'])
-parser.add_argument('-w', '--time', help='image age', type=str, required=False,
- choices=['past-24-hours','past-7-days'])
-parser.add_argument('-a', '--aspect_ratio', help='comma separated additional words added to keywords', type=str, required=False,
- choices=['tall', 'square', 'wide', 'panoramic'])
-parser.add_argument('-si', '--similar_images', help='downloads images very similar to the image URL you provide', type=str, required=False)
-parser.add_argument('-ss', '--specific_site', help='downloads images that are indexed from a specific website', type=str, required=False)
-parser.add_argument('-p', '--print_urls', default=False, help="Print the URLs of the images", action="store_true")
-
-args = parser.parse_args()
-
-#Initialization and Validation of user arguments
-if args.keywords:
- search_keyword = [str(item) for item in args.keywords.split(',')]
-
-#Additional words added to keywords
-if args.suffix_keywords:
- suffix_keywords = [" " + str(sk) for sk in args.suffix_keywords.split(',')]
-else:
- suffix_keywords = []
-
-# Setting limit on number of images to be downloaded
-if args.limit:
- limit = int(args.limit)
- if int(args.limit) >= 100:
- limit = 100
-else:
- limit = 100
-
-# If single_image or url argument not present then keywords is mandatory argument
-if args.single_image is None and args.url is None and args.similar_images is None and args.keywords is None:
- parser.error('Keywords is a required argument!')
-
-# If this argument is present, set the custom output directory
-if args.output_directory:
- main_directory = args.output_directory
-else:
- main_directory = "downloads"
-
-# Set the delay parameter if this argument is present
-if args.delay:
- try:
- delay_time = int(args.delay)
- except ValueError:
- parser.error('Delay parameter should be an integer!')
-else:
- delay_time = 0
-
-if args.print_urls:
- print_url = 'yes'
-else:
- print_url = 'no'
-#------ Initialization Complete ------#
-
-# Downloading entire Web Document (Raw Page Content)
-def download_page(url):
- version = (3, 0)
- cur_version = sys.version_info
- if cur_version >= version: # If the Current Version of Python is 3.0 or above
try:
- headers = {}
- headers['User-Agent'] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
- req = urllib.request.Request(url, headers=headers)
- resp = urllib.request.urlopen(req)
- respData = str(resp.read())
- return respData
+ browser = webdriver.Chrome(chromedriver, chrome_options=options)
except Exception as e:
- print(str(e))
- else: # If the Current Version of Python is 2.x
+ print("Looks like we cannot locate the path the 'chromedriver' (use the '--chromedriver' "
+ "argument to specify the path to the executable.) or google chrome browser is not "
+ "installed on your machine (exception: %s)" % e)
+ sys.exit()
+ browser.set_window_size(1024, 768)
+
+ # Open the link
+ browser.get(url)
+ time.sleep(1)
+ print("Getting you a lot of images. This may take a few moments...")
+
+ element = browser.find_element_by_tag_name("body")
+ # Scroll down
+ for i in range(30):
+ element.send_keys(Keys.PAGE_DOWN)
+ time.sleep(0.3)
+
try:
- headers = {}
- headers['User-Agent'] = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"
- req = urllib2.Request(url, headers=headers)
- try:
- response = urllib2.urlopen(req)
- except URLError: # Handling SSL certificate failed
- context = ssl._create_unverified_context()
- response = urlopen(req, context=context)
- page = response.read()
- return page
+ browser.find_element_by_id("smb").click()
+ for i in range(50):
+ element.send_keys(Keys.PAGE_DOWN)
+ time.sleep(0.3) # bot id protection
except:
- return "Page Not found"
+ for i in range(10):
+ element.send_keys(Keys.PAGE_DOWN)
+ time.sleep(0.3) # bot id protection
+ print("Reached end of Page.")
+ time.sleep(0.5)
-# Finding 'Next Image' from the given raw page
-def _images_get_next_item(s):
- start_line = s.find('rg_di')
- if start_line == -1: # If no links are found then give an error!
- end_quote = 0
- link = "no_links"
- return link, end_quote
- else:
- start_line = s.find('"class="rg_meta"')
- start_content = s.find('"ou"', start_line + 1)
- end_content = s.find(',"ow"', start_content + 1)
- content_raw = str(s[start_content + 6:end_content - 1])
- return content_raw, end_content
-
-
-# Getting all links with the help of '_images_get_next_image'
-def _images_get_all_items(page):
- items = []
- while True:
- item, end_content = _images_get_next_item(page)
- if item == "no_links":
- break
+ source = browser.page_source #page source
+ #close the browser
+ browser.close()
+
+ return source
+
+
+ #Correcting the escape characters for python2
+ def replace_with_byte(self,match):
+ return chr(int(match.group(0)[1:], 8))
+
+ def repair(self,brokenjson):
+ invalid_escape = re.compile(r'\\[0-7]{1,3}') # up to 3 digits for byte values up to FF
+ return invalid_escape.sub(self.replace_with_byte, brokenjson)
+
+
+ # Finding 'Next Image' from the given raw page
+ def get_next_tab(self,s):
+ start_line = s.find('class="dtviD"')
+ if start_line == -1: # If no links are found then give an error!
+ end_quote = 0
+ link = "no_tabs"
+ return link,'',end_quote
else:
- items.append(item) # Append all the links in the list named 'Links'
- time.sleep(0.1) # Timer could be used to slow down the request for image downloads
- page = page[end_content:]
- return items
+ start_line = s.find('class="dtviD"')
+ start_content = s.find('href="', start_line + 1)
+ end_content = s.find('">', start_content + 1)
+ url_item = "https://www.google.com" + str(s[start_content + 6:end_content])
+ url_item = url_item.replace('&', '&')
+
+ start_line_2 = s.find('class="dtviD"')
+ s = s.replace('&', '&')
+ start_content_2 = s.find(':', start_line_2 + 1)
+ end_content_2 = s.find('&usg=', start_content_2 + 1)
+ url_item_name = str(s[start_content_2 + 1:end_content_2])
+
+ chars = url_item_name.find(',g_1:')
+ chars_end = url_item_name.find(":", chars + 6)
+ if chars_end == -1:
+ updated_item_name = (url_item_name[chars + 5:]).replace("+", " ")
+ else:
+ updated_item_name = (url_item_name[chars+5:chars_end]).replace("+", " ")
+ return url_item, updated_item_name, end_content
-def similar_images():
- version = (3, 0)
- cur_version = sys.version_info
- if cur_version >= version: # If the Current Version of Python is 3.0 or above
+
+ # Getting all links with the help of '_images_get_next_image'
+ def get_all_tabs(self,page):
+ tabs = {}
+ while True:
+ item,item_name,end_content = self.get_next_tab(page)
+ if item == "no_tabs":
+ break
+ else:
+ if len(item_name) > 100 or item_name == "background-color":
+ break
+ else:
+ tabs[item_name] = item # Append all the links in the list named 'Links'
+ time.sleep(0.1) # Timer could be used to slow down the request for image downloads
+ page = page[end_content:]
+ return tabs
+
+
+ #Format the object in readable format
+ def format_object(self,object):
+ formatted_object = {}
+ formatted_object['image_format'] = object['ity']
+ formatted_object['image_height'] = object['oh']
+ formatted_object['image_width'] = object['ow']
+ formatted_object['image_link'] = object['ou']
+ formatted_object['image_description'] = object['pt']
+ formatted_object['image_host'] = object['rh']
+ formatted_object['image_source'] = object['ru']
+ formatted_object['image_thumbnail_url'] = object['tu']
+ return formatted_object
+
+
+ #function to download single image
+ def single_image(self,image_url):
+ main_directory = "downloads"
+ extensions = (".jpg", ".gif", ".png", ".bmp", ".svg", ".webp", ".ico")
+ url = image_url
try:
- searchUrl = 'https://www.google.com/searchbyimage?site=search&sa=X&image_url=' + args.similar_images
- headers = {}
- headers['User-Agent'] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
-
- req1 = urllib.request.Request(searchUrl, headers=headers)
- resp1 = urllib.request.urlopen(req1)
- content = str(resp1.read())
- l1 = content.find('AMhZZ')
- l2 = content.find('&', l1)
- urll = content[l1:l2]
-
- newurl = "https://www.google.com/search?tbs=sbi:" + urll + "&site=search&sa=X"
- req2 = urllib.request.Request(newurl, headers=headers)
- resp2 = urllib.request.urlopen(req2)
- # print(resp2.read())
- l3 = content.find('/search?sa=X&q=')
- l4 = content.find(';', l3 + 19)
- urll2 = content[l3 + 19:l4]
- return urll2
- except:
- return "Cloud not connect to Google Imagees endpoint"
- else: # If the Current Version of Python is 2.x
+ os.makedirs(main_directory)
+ except OSError as e:
+ if e.errno != 17:
+ raise
+ pass
+ req = Request(url, headers={
+ "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"})
+
+ response = urlopen(req, None, 10)
+ data = response.read()
+ response.close()
+
+ image_name = str(url[(url.rfind('/')) + 1:])
+ if '?' in image_name:
+ image_name = image_name[:image_name.find('?')]
+ # if ".jpg" in image_name or ".gif" in image_name or ".png" in image_name or ".bmp" in image_name or ".svg" in image_name or ".webp" in image_name or ".ico" in image_name:
+ if any(map(lambda extension: extension in image_name, extensions)):
+ file_name = main_directory + "/" + image_name
+ else:
+ file_name = main_directory + "/" + image_name + ".jpg"
+ image_name = image_name + ".jpg"
+
try:
- searchUrl = 'https://www.google.com/searchbyimage?site=search&sa=X&image_url=' + args.similar_images
- headers = {}
- headers['User-Agent'] = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"
-
- req1 = urllib2.Request(searchUrl, headers=headers)
- resp1 = urllib2.urlopen(req1)
- content = str(resp1.read())
- l1 = content.find('AMhZZ')
- l2 = content.find('&', l1)
- urll = content[l1:l2]
-
- newurl = "https://www.google.com/search?tbs=sbi:" + urll + "&site=search&sa=X"
- #print newurl
- req2 = urllib2.Request(newurl, headers=headers)
- resp2 = urllib2.urlopen(req2)
- # print(resp2.read())
- l3 = content.find('/search?sa=X&q=')
- l4 = content.find(';', l3 + 19)
- urll2 = content[l3 + 19:l4]
- return(urll2)
- except:
- return "Cloud not connect to Google Imagees endpoint"
-
-#Building URL parameters
-def build_url_parameters():
- built_url = "&tbs="
- counter = 0
- params = {'color':[args.color,{'red':'ic:specific,isc:red', 'orange':'ic:specific,isc:orange', 'yellow':'ic:specific,isc:yellow', 'green':'ic:specific,isc:green', 'teal':'ic:specific,isc:teel', 'blue':'ic:specific,isc:blue', 'purple':'ic:specific,isc:purple', 'pink':'ic:specific,isc:pink', 'white':'ic:specific,isc:white', 'gray':'ic:specific,isc:gray', 'black':'ic:specific,isc:black', 'brown':'ic:specific,isc:brown'}],
- 'color_type':[args.color_type,{'full-color':'ic:color', 'black-and-white':'ic:gray','transparent':'ic:trans'}],
- 'usage_rights':[args.usage_rights,{'labled-for-reuse-with-modifications':'sur:fmc','labled-for-reuse':'sur:fc','labled-for-noncommercial-reuse-with-modification':'sur:fm','labled-for-nocommercial-reuse':'sur:f'}],
- 'size':[args.size,{'large':'isz:l','medium':'isz:m','icon':'isz:i'}],
- 'type':[args.type,{'face':'itp:face','photo':'itp:photo','clip-art':'itp:clip-art','line-drawing':'itp:lineart','animated':'itp:animated'}],
- 'time':[args.time,{'past-24-hours':'qdr:d','past-7-days':'qdr:w'}],
- 'aspect_ratio':[args.aspect_ratio,{'tall':'iar:t','square':'iar:s','wide':'iar:w','panoramic':'iar:xw'}],
- 'format':[args.format,{'jpg':'ift:jpg','gif':'ift:gif','png':'ift:png','bmp':'ift:bmp','svg':'ift:svg','webp':'webp','ico':'ift:ico'}]}
- for key, value in params.items():
- if value[0] is not None:
- ext_param = value[1][value[0]]
- # counter will tell if it is first param added or not
- if counter == 0:
- # add it to the built url
- built_url = built_url + ext_param
- counter += 1
- else:
- built_url = built_url + ',' + ext_param
- counter += 1
- return built_url
-
-#function to download single image
-def single_image():
- url = args.single_image
- try:
- os.makedirs(main_directory)
- except OSError as e:
- if e.errno != 17:
- raise
- # time.sleep might help here
- pass
- req = Request(url, headers={
- "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"})
- response = urlopen(req, None, 10)
- image_name = str(url[(url.rfind('/')) + 1:])
- if '?' in image_name:
- image_name = image_name[:image_name.find('?')]
- if ".jpg" in image_name or ".gif" in image_name or ".png" in image_name or ".bmp" in image_name or ".svg" in image_name or ".webp" in image_name or ".ico" in image_name:
- output_file = open(main_directory + "/" + image_name, 'wb')
- else:
- output_file = open(main_directory + "/" + image_name + ".jpg", 'wb')
- image_name = image_name + ".jpg"
+ output_file = open(file_name, 'wb')
+ output_file.write(data)
+ output_file.close()
+ except IOError as e:
+ raise e
+ except OSError as e:
+ raise e
+ print("completed ====> " + image_name.encode('raw_unicode_escape').decode('utf-8'))
+ return
+
+ def similar_images(self,similar_images):
+ version = (3, 0)
+ cur_version = sys.version_info
+ if cur_version >= version: # If the Current Version of Python is 3.0 or above
+ try:
+ searchUrl = 'https://www.google.com/searchbyimage?site=search&sa=X&image_url=' + similar_images
+ headers = {}
+ headers['User-Agent'] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
+
+ req1 = urllib.request.Request(searchUrl, headers=headers)
+ resp1 = urllib.request.urlopen(req1)
+ content = str(resp1.read())
+ l1 = content.find('AMhZZ')
+ l2 = content.find('&', l1)
+ urll = content[l1:l2]
+
+ newurl = "https://www.google.com/search?tbs=sbi:" + urll + "&site=search&sa=X"
+ req2 = urllib.request.Request(newurl, headers=headers)
+ resp2 = urllib.request.urlopen(req2)
+ l3 = content.find('/search?sa=X&q=')
+ l4 = content.find(';', l3 + 19)
+ urll2 = content[l3 + 19:l4]
+ return urll2
+ except:
+ return "Cloud not connect to Google Images endpoint"
+ else: # If the Current Version of Python is 2.x
+ try:
+ searchUrl = 'https://www.google.com/searchbyimage?site=search&sa=X&image_url=' + similar_images
+ headers = {}
+ headers['User-Agent'] = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"
+
+ req1 = urllib2.Request(searchUrl, headers=headers)
+ resp1 = urllib2.urlopen(req1)
+ content = str(resp1.read())
+ l1 = content.find('AMhZZ')
+ l2 = content.find('&', l1)
+ urll = content[l1:l2]
+
+ newurl = "https://www.google.com/search?tbs=sbi:" + urll + "&site=search&sa=X"
+ req2 = urllib2.Request(newurl, headers=headers)
+ resp2 = urllib2.urlopen(req2)
+ l3 = content.find('/search?sa=X&q=')
+ l4 = content.find(';', l3 + 19)
+ urll2 = content[l3 + 19:l4]
+ return(urll2)
+ except:
+ return "Cloud not connect to Google Images endpoint"
+
+ #Building URL parameters
+ def build_url_parameters(self,arguments):
+ if arguments['language']:
+ lang = "&lr="
+ lang_param = {"Arabic":"lang_ar","Chinese (Simplified)":"lang_zh-CN","Chinese (Traditional)":"lang_zh-TW","Czech":"lang_cs","Danish":"lang_da","Dutch":"lang_nl","English":"lang_en","Estonian":"lang_et","Finnish":"lang_fi","French":"lang_fr","German":"lang_de","Greek":"lang_el","Hebrew":"lang_iw ","Hungarian":"lang_hu","Icelandic":"lang_is","Italian":"lang_it","Japanese":"lang_ja","Korean":"lang_ko","Latvian":"lang_lv","Lithuanian":"lang_lt","Norwegian":"lang_no","Portuguese":"lang_pt","Polish":"lang_pl","Romanian":"lang_ro","Russian":"lang_ru","Spanish":"lang_es","Swedish":"lang_sv","Turkish":"lang_tr"}
+ lang_url = lang+lang_param[arguments['language']]
+ else:
+ lang_url = ''
- data = response.read()
- output_file.write(data)
- response.close()
- print("completed ====> " + image_name)
- return
+ if arguments['time_range']:
+ json_acceptable_string = arguments['time_range'].replace("'", "\"")
+ d = json.loads(json_acceptable_string)
+ time_range = ',cdr:1,cd_min:' + d['time_min'] + ',cd_max:' + d['time_max']
+ else:
+ time_range = ''
+ if arguments['exact_size']:
+ size_array = [x.strip() for x in arguments['exact_size'].split(',')]
+ exact_size = ",isz:ex,iszw:" + str(size_array[0]) + ",iszh:" + str(size_array[1])
+ else:
+ exact_size = ''
+
+ built_url = "&tbs="
+ counter = 0
+ params = {'color':[arguments['color'],{'red':'ic:specific,isc:red', 'orange':'ic:specific,isc:orange', 'yellow':'ic:specific,isc:yellow', 'green':'ic:specific,isc:green', 'teal':'ic:specific,isc:teel', 'blue':'ic:specific,isc:blue', 'purple':'ic:specific,isc:purple', 'pink':'ic:specific,isc:pink', 'white':'ic:specific,isc:white', 'gray':'ic:specific,isc:gray', 'black':'ic:specific,isc:black', 'brown':'ic:specific,isc:brown'}],
+ 'color_type':[arguments['color_type'],{'full-color':'ic:color', 'black-and-white':'ic:gray','transparent':'ic:trans'}],
+ 'usage_rights':[arguments['usage_rights'],{'labeled-for-reuse-with-modifications':'sur:fmc','labeled-for-reuse':'sur:fc','labeled-for-noncommercial-reuse-with-modification':'sur:fm','labeled-for-nocommercial-reuse':'sur:f'}],
+ 'size':[arguments['size'],{'large':'isz:l','medium':'isz:m','icon':'isz:i','>400*300':'isz:lt,islt:qsvga','>640*480':'isz:lt,islt:vga','>800*600':'isz:lt,islt:svga','>1024*768':'visz:lt,islt:xga','>2MP':'isz:lt,islt:2mp','>4MP':'isz:lt,islt:4mp','>6MP':'isz:lt,islt:6mp','>8MP':'isz:lt,islt:8mp','>10MP':'isz:lt,islt:10mp','>12MP':'isz:lt,islt:12mp','>15MP':'isz:lt,islt:15mp','>20MP':'isz:lt,islt:20mp','>40MP':'isz:lt,islt:40mp','>70MP':'isz:lt,islt:70mp'}],
+ 'type':[arguments['type'],{'face':'itp:face','photo':'itp:photo','clipart':'itp:clipart','line-drawing':'itp:lineart','animated':'itp:animated'}],
+ 'time':[arguments['time'],{'past-24-hours':'qdr:d','past-7-days':'qdr:w','past-month':'qdr:m','past-year':'qdr:y'}],
+ 'aspect_ratio':[arguments['aspect_ratio'],{'tall':'iar:t','square':'iar:s','wide':'iar:w','panoramic':'iar:xw'}],
+ 'format':[arguments['format'],{'jpg':'ift:jpg','gif':'ift:gif','png':'ift:png','bmp':'ift:bmp','svg':'ift:svg','webp':'webp','ico':'ift:ico','raw':'ift:craw'}]}
+ for key, value in params.items():
+ if value[0] is not None:
+ ext_param = value[1][value[0]]
+ # counter will tell if it is first param added or not
+ if counter == 0:
+ # add it to the built url
+ built_url = built_url + ext_param
+ counter += 1
+ else:
+ built_url = built_url + ',' + ext_param
+ counter += 1
+ built_url = lang_url+built_url+exact_size+time_range
+ return built_url
+
+
+ #building main search URL
+ def build_search_url(self,search_term,params,url,similar_images,specific_site,safe_search):
+ #check safe_search
+ safe_search_string = "&safe=active"
+ # check the args and choose the URL
+ if url:
+ url = url
+ elif similar_images:
+ print(similar_images)
+ keywordem = self.similar_images(similar_images)
+ url = 'https://www.google.com/search?q=' + keywordem + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg'
+ elif specific_site:
+ url = 'https://www.google.com/search?q=' + quote(
+ search_term.encode('utf-8')) + '&as_sitesearch=' + specific_site + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch' + params + '&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg'
+ else:
+ url = 'https://www.google.com/search?q=' + quote(
+ search_term.encode('utf-8')) + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch' + params + '&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg'
+
+ #safe search check
+ if safe_search:
+ url = url + safe_search_string
+
+ return url
+
+
+ #measures the file size
+ def file_size(self,file_path):
+ if os.path.isfile(file_path):
+ file_info = os.stat(file_path)
+ size = file_info.st_size
+ for x in ['bytes', 'KB', 'MB', 'GB', 'TB']:
+ if size < 1024.0:
+ return "%3.1f %s" % (size, x)
+ size /= 1024.0
+ return size
+
+ #keywords from file
+ def keywords_from_file(self,file_name):
+ search_keyword = []
+ with codecs.open(file_name, 'r', encoding='utf-8-sig') as f:
+ if '.csv' in file_name:
+ for line in f:
+ if line in ['\n', '\r\n']:
+ pass
+ else:
+ search_keyword.append(line.replace('\n', '').replace('\r', ''))
+ elif '.txt' in file_name:
+ for line in f:
+ if line in ['\n', '\r\n']:
+ pass
+ else:
+ search_keyword.append(line.replace('\n', '').replace('\r', ''))
+ else:
+ print("Invalid file type: Valid file types are either .txt or .csv \n"
+ "exiting...")
+ sys.exit()
+ return search_keyword
+
+ # make directories
+ def create_directories(self,main_directory, dir_name,thumbnail,thumbnail_only):
+ dir_name_thumbnail = dir_name + " - thumbnail"
+ # make a search keyword directory
+ try:
+ if not os.path.exists(main_directory):
+ os.makedirs(main_directory)
+ time.sleep(0.2)
+ path = (dir_name)
+ sub_directory = os.path.join(main_directory, path)
+ if not os.path.exists(sub_directory):
+ os.makedirs(sub_directory)
+ if thumbnail or thumbnail_only:
+ sub_directory_thumbnail = os.path.join(main_directory, dir_name_thumbnail)
+ if not os.path.exists(sub_directory_thumbnail):
+ os.makedirs(sub_directory_thumbnail)
+ else:
+ path = (dir_name)
+ sub_directory = os.path.join(main_directory, path)
+ if not os.path.exists(sub_directory):
+ os.makedirs(sub_directory)
+ if thumbnail or thumbnail_only:
+ sub_directory_thumbnail = os.path.join(main_directory, dir_name_thumbnail)
+ if not os.path.exists(sub_directory_thumbnail):
+ os.makedirs(sub_directory_thumbnail)
+ except OSError as e:
+ if e.errno != 17:
+ raise
+ pass
+ return
+
+
+ # Download Image thumbnails
+ def download_image_thumbnail(self,image_url,main_directory,dir_name,return_image_name,print_urls,socket_timeout,print_size,no_download,save_source,img_src,ignore_urls):
+ if print_urls or no_download:
+ print("Image URL: " + image_url)
+ if no_download:
+ return "success","Printed url without downloading"
+ try:
+ req = Request(image_url, headers={
+ "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"})
+ try:
+ # timeout time to download an image
+ if socket_timeout:
+ timeout = float(socket_timeout)
+ else:
+ timeout = 10
-def bulk_download(search_keyword,suffix_keywords,limit,main_directory,delay_time,print_url):
- errorCount = 0
- if args.url:
- search_keyword = [str(datetime.datetime.now()).split('.')[0]]
- if args.similar_images:
- search_keyword = [str(datetime.datetime.now()).split('.')[0]]
+ response = urlopen(req, None, timeout)
+ data = response.read()
+ response.close()
- # appending a dummy value to Suffix Keywords array if it is blank
- if len(suffix_keywords) == 0:
- suffix_keywords.append('')
+ path = main_directory + "/" + dir_name + " - thumbnail" + "/" + return_image_name
- for sky in suffix_keywords:
- i = 0
- while i < len(search_keyword):
- items = []
- iteration = "\n" + "Item no.: " + str(i + 1) + " -->" + " Item name = " + str(search_keyword[i] + str(sky))
- print(iteration)
- print("Evaluating...")
- search_term = search_keyword[i] + sky
- dir_name = search_term + ('-' + args.color if args.color else '')
-
- # make a search keyword directory
+ try:
+ output_file = open(path, 'wb')
+ output_file.write(data)
+ output_file.close()
+ if save_source:
+ list_path = main_directory + "/" + save_source + ".txt"
+ list_file = open(list_path,'a')
+ list_file.write(path + '\t' + img_src + '\n')
+ list_file.close()
+ except OSError as e:
+ download_status = 'fail'
+ download_message = "OSError on an image...trying next one..." + " Error: " + str(e)
+ except IOError as e:
+ download_status = 'fail'
+ download_message = "IOError on an image...trying next one..." + " Error: " + str(e)
+
+ download_status = 'success'
+ download_message = "Completed Image Thumbnail ====> " + return_image_name
+
+ # image size parameter
+ if print_size:
+ print("Image Size: " + str(self.file_size(path)))
+
+ except UnicodeEncodeError as e:
+ download_status = 'fail'
+ download_message = "UnicodeEncodeError on an image...trying next one..." + " Error: " + str(e)
+
+ except HTTPError as e: # If there is any HTTPError
+ download_status = 'fail'
+ download_message = "HTTPError on an image...trying next one..." + " Error: " + str(e)
+
+ except URLError as e:
+ download_status = 'fail'
+ download_message = "URLError on an image...trying next one..." + " Error: " + str(e)
+
+ except ssl.CertificateError as e:
+ download_status = 'fail'
+ download_message = "CertificateError on an image...trying next one..." + " Error: " + str(e)
+
+ except IOError as e: # If there is any IOError
+ download_status = 'fail'
+ download_message = "IOError on an image...trying next one..." + " Error: " + str(e)
+ return download_status, download_message
+
+
+ # Download Images
+ def download_image(self,image_url,image_format,main_directory,dir_name,count,print_urls,socket_timeout,prefix,print_size,no_numbering,no_download,save_source,img_src,silent_mode,thumbnail_only,format,ignore_urls):
+ if not silent_mode:
+ if print_urls or no_download:
+ print("Image URL: " + image_url)
+ if ignore_urls:
+ if any(url in image_url for url in ignore_urls.split(',')):
+ return "fail", "Image ignored due to 'ignore url' parameter", None, image_url
+ if thumbnail_only:
+ return "success", "Skipping image download...", str(image_url[(image_url.rfind('/')) + 1:]), image_url
+ if no_download:
+ return "success","Printed url without downloading",None,image_url
+ try:
+ req = Request(image_url, headers={
+ "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"})
try:
- if not os.path.exists(main_directory):
- os.makedirs(main_directory)
- time.sleep(0.2)
- path = str(dir_name)
- sub_directory = os.path.join(main_directory, path)
- if not os.path.exists(sub_directory):
- os.makedirs(sub_directory)
+ # timeout time to download an image
+ if socket_timeout:
+ timeout = float(socket_timeout)
else:
- path = str(dir_name)
- sub_directory = os.path.join(main_directory, path)
- if not os.path.exists(sub_directory):
- os.makedirs(sub_directory)
- except OSError as e:
- if e.errno != 17:
- raise
- # time.sleep might help here
- pass
-
- params = build_url_parameters()
- # color_param = ('&tbs=ic:specific,isc:' + args.color) if args.color else ''
- # check the args and choose the URL
- if args.url:
- url = args.url
- elif args.similar_images:
- keywordem = similar_images()
- url = 'https://www.google.com/search?q=' + keywordem + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg'
- elif args.specific_site:
- url = 'https://www.google.com/search?q=' + quote(
- search_term) + 'site:' + args.specific_site + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch' + params + '&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg'
- else:
- url = 'https://www.google.com/search?q=' + quote(
- search_term) + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch' + params + '&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg'
- raw_html = (download_page(url))
- time.sleep(0.1)
- items = items + (_images_get_all_items(raw_html))
- print("Total Image Links = " + str(len(items)))
-
- #If search does not return anything, do not try to force download
- if len(items) <= 1:
- print('***** This search result did not return any results...please try a different search filter *****')
- break
+ timeout = 10
+
+ response = urlopen(req, None, timeout)
+ data = response.read()
+ response.close()
+
+ extensions = [".jpg", ".jpeg", ".gif", ".png", ".bmp", ".svg", ".webp", ".ico"]
+ # keep everything after the last '/'
+ image_name = str(image_url[(image_url.rfind('/')) + 1:])
+ if format:
+ if not image_format or image_format != format:
+ download_status = 'fail'
+ download_message = "Wrong image format returned. Skipping..."
+ return_image_name = ''
+ absolute_path = ''
+ return download_status, download_message, return_image_name, absolute_path
+
+ if image_format == "" or not image_format or "." + image_format not in extensions:
+ download_status = 'fail'
+ download_message = "Invalid or missing image format. Skipping..."
+ return_image_name = ''
+ absolute_path = ''
+ return download_status, download_message, return_image_name, absolute_path
+ elif image_name.lower().find("." + image_format) < 0:
+ image_name = image_name + "." + image_format
+ else:
+ image_name = image_name[:image_name.lower().find("." + image_format) + (len(image_format) + 1)]
- print("Starting Download...")
+ # prefix name in image
+ if prefix:
+ prefix = prefix + " "
+ else:
+ prefix = ''
+
+ if no_numbering:
+ path = main_directory + "/" + dir_name + "/" + prefix + image_name
+ else:
+ path = main_directory + "/" + dir_name + "/" + prefix + str(count) + "." + image_name
- k = 0
- success_count = 0
- while (k < len(items)): # items ==> URLs
try:
- image_url = items[k]
-
- if print_url == 'yes':
- print("\n" + str(image_url))
-
- req = Request(image_url, headers={
- "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"})
- try:
- response = urlopen(req, None, 15)
- image_name = str(items[k][(items[k].rfind('/')) + 1:])
- if '?' in image_name:
- image_name = image_name[:image_name.find('?')]
- if ".jpg" in image_name or ".JPG" in image_name or ".gif" in image_name or ".png" in image_name or ".bmp" in image_name or ".svg" in image_name or ".webp" in image_name or ".ico" in image_name:
- output_file = open(main_directory + "/" + dir_name + "/" + str(success_count + 1) + ". " + image_name, 'wb')
- else:
- if args.format:
- output_file = open(
- main_directory + "/" + dir_name + "/" + str(success_count + 1) + ". " + image_name + "." + args.format,
- 'wb')
- image_name = image_name + "." + args.format
- else:
- output_file = open(
- main_directory + "/" + dir_name + "/" + str(success_count + 1) + ". " + image_name + ".jpg", 'wb')
- image_name = image_name + ".jpg"
-
- data = response.read()
- output_file.write(data)
- response.close()
-
- print("Completed ====> " + str(success_count + 1) + ". " + image_name)
- k = k + 1
- success_count += 1
- if success_count == limit:
- break
-
- except UnicodeEncodeError as e:
- errorCount +=1
- print ("UnicodeEncodeError on an image...trying next one..." + " Error: " + str(e))
- k = k + 1
-
- except HTTPError as e: # If there is any HTTPError
- errorCount += 1
- print("HTTPError on an image...trying next one..." + " Error: " + str(e))
- k = k + 1
+ output_file = open(path, 'wb')
+ output_file.write(data)
+ output_file.close()
+ if save_source:
+ list_path = main_directory + "/" + save_source + ".txt"
+ list_file = open(list_path,'a')
+ list_file.write(path + '\t' + img_src + '\n')
+ list_file.close()
+ absolute_path = os.path.abspath(path)
+ except OSError as e:
+ download_status = 'fail'
+ download_message = "OSError on an image...trying next one..." + " Error: " + str(e)
+ return_image_name = ''
+ absolute_path = ''
+
+ #return image name back to calling method to use it for thumbnail downloads
+ download_status = 'success'
+ download_message = "Completed Image ====> " + prefix + str(count) + "." + image_name
+ return_image_name = prefix + str(count) + "." + image_name
+
+ # image size parameter
+ if not silent_mode:
+ if print_size:
+ print("Image Size: " + str(self.file_size(path)))
+
+ except UnicodeEncodeError as e:
+ download_status = 'fail'
+ download_message = "UnicodeEncodeError on an image...trying next one..." + " Error: " + str(e)
+ return_image_name = ''
+ absolute_path = ''
+
+ except URLError as e:
+ download_status = 'fail'
+ download_message = "URLError on an image...trying next one..." + " Error: " + str(e)
+ return_image_name = ''
+ absolute_path = ''
+
+ except BadStatusLine as e:
+ download_status = 'fail'
+ download_message = "BadStatusLine on an image...trying next one..." + " Error: " + str(e)
+ return_image_name = ''
+ absolute_path = ''
+
+ except HTTPError as e: # If there is any HTTPError
+ download_status = 'fail'
+ download_message = "HTTPError on an image...trying next one..." + " Error: " + str(e)
+ return_image_name = ''
+ absolute_path = ''
+
+ except URLError as e:
+ download_status = 'fail'
+ download_message = "URLError on an image...trying next one..." + " Error: " + str(e)
+ return_image_name = ''
+ absolute_path = ''
+
+ except ssl.CertificateError as e:
+ download_status = 'fail'
+ download_message = "CertificateError on an image...trying next one..." + " Error: " + str(e)
+ return_image_name = ''
+ absolute_path = ''
+
+ except IOError as e: # If there is any IOError
+ download_status = 'fail'
+ download_message = "IOError on an image...trying next one..." + " Error: " + str(e)
+ return_image_name = ''
+ absolute_path = ''
+
+ except IncompleteRead as e:
+ download_status = 'fail'
+ download_message = "IncompleteReadError on an image...trying next one..." + " Error: " + str(e)
+ return_image_name = ''
+ absolute_path = ''
+
+ return download_status,download_message,return_image_name,absolute_path
+
+
+ # Finding 'Next Image' from the given raw page
+ def _get_next_item(self,s):
+ start_line = s.find('rg_meta notranslate')
+ if start_line == -1: # If no links are found then give an error!
+ end_quote = 0
+ link = "no_links"
+ return link, end_quote
+ else:
+ start_line = s.find('class="rg_meta notranslate">')
+ start_object = s.find('{', start_line + 1)
+ end_object = s.find('', start_object + 1)
+ object_raw = str(s[start_object:end_object])
+ #remove escape characters based on python version
+ version = (3, 0)
+ cur_version = sys.version_info
+ if cur_version >= version: #python3
+ try:
+ object_decode = bytes(object_raw, "utf-8").decode("unicode_escape")
+ final_object = json.loads(object_decode)
+ except:
+ final_object = ""
+ else: #python2
+ try:
+ final_object = (json.loads(self.repair(object_raw)))
+ except:
+ final_object = ""
+ return final_object, end_object
- except URLError as e:
- errorCount += 1
- print("URLError on an image...trying next one..." + " Error: " + str(e))
- k = k + 1
- except ssl.CertificateError as e:
+ # Getting all links with the help of '_images_get_next_image'
+ def _get_all_items(self,page,main_directory,dir_name,limit,arguments):
+ items = []
+ abs_path = []
+ errorCount = 0
+ i = 0
+ count = 1
+ while count < limit+1:
+ object, end_content = self._get_next_item(page)
+ if object == "no_links":
+ break
+ elif object == "":
+ page = page[end_content:]
+ elif arguments['offset'] and count < int(arguments['offset']):
+ count += 1
+ page = page[end_content:]
+ else:
+ #format the item for readability
+ object = self.format_object(object)
+ if arguments['metadata']:
+ if not arguments["silent_mode"]:
+ print("\nImage Metadata: " + str(object))
+
+ #download the images
+ download_status,download_message,return_image_name,absolute_path = self.download_image(object['image_link'],object['image_format'],main_directory,dir_name,count,arguments['print_urls'],arguments['socket_timeout'],arguments['prefix'],arguments['print_size'],arguments['no_numbering'],arguments['no_download'],arguments['save_source'],object['image_source'],arguments["silent_mode"],arguments["thumbnail_only"],arguments['format'],arguments['ignore_urls'])
+ if not arguments["silent_mode"]:
+ print(download_message)
+ if download_status == "success":
+
+ # download image_thumbnails
+ if arguments['thumbnail'] or arguments["thumbnail_only"]:
+ download_status, download_message_thumbnail = self.download_image_thumbnail(object['image_thumbnail_url'],main_directory,dir_name,return_image_name,arguments['print_urls'],arguments['socket_timeout'],arguments['print_size'],arguments['no_download'],arguments['save_source'],object['image_source'],arguments['ignore_urls'])
+ if not arguments["silent_mode"]:
+ print(download_message_thumbnail)
+
+ count += 1
+ object['image_filename'] = return_image_name
+ items.append(object) # Append all the links in the list named 'Links'
+ abs_path.append(absolute_path)
+ else:
errorCount += 1
- print("CertificateError on an image...trying next one..." + " Error: " + str(e))
- k = k + 1
- except IOError as e: # If there is any IOError
- errorCount += 1
- print("IOError on an image...trying next one..." + " Error: " + str(e))
- k = k + 1
+ #delay param
+ if arguments['delay']:
+ time.sleep(int(arguments['delay']))
+
+ page = page[end_content:]
+ i += 1
+ if count < limit:
+ print("\n\nUnfortunately all " + str(
+ limit) + " could not be downloaded because some images were not downloadable. " + str(
+ count-1) + " is all we got for this search filter!")
+ return items,errorCount,abs_path
+
+
+ # Bulk Download
+ def download(self,arguments):
+ paths_agg = {}
+ # for input coming from other python files
+ if __name__ != "__main__":
+ # if the calling file contains config_file param
+ if 'config_file' in arguments:
+ records = []
+ json_file = json.load(open(arguments['config_file']))
+ for record in range(0, len(json_file['Records'])):
+ arguments = {}
+ for i in args_list:
+ arguments[i] = None
+ for key, value in json_file['Records'][record].items():
+ arguments[key] = value
+ records.append(arguments)
+ total_errors = 0
+ for rec in records:
+ paths, errors = self.download_executor(rec)
+ for i in paths:
+ paths_agg[i] = paths[i]
+ if not arguments["silent_mode"]:
+ if arguments['print_paths']:
+ print(paths.encode('raw_unicode_escape').decode('utf-8'))
+ total_errors = total_errors + errors
+ return paths_agg,total_errors
+ # if the calling file contains params directly
+ else:
+ paths, errors = self.download_executor(arguments)
+ for i in paths:
+ paths_agg[i] = paths[i]
+ if not arguments["silent_mode"]:
+ if arguments['print_paths']:
+ print(paths.encode('raw_unicode_escape').decode('utf-8'))
+ return paths_agg, errors
+ # for input coming from CLI
+ else:
+ paths, errors = self.download_executor(arguments)
+ for i in paths:
+ paths_agg[i] = paths[i]
+ if not arguments["silent_mode"]:
+ if arguments['print_paths']:
+ print(paths.encode('raw_unicode_escape').decode('utf-8'))
+ return paths_agg, errors
+
+ def download_executor(self,arguments):
+ paths = {}
+ errorCount = None
+ for arg in args_list:
+ if arg not in arguments:
+ arguments[arg] = None
+ ######Initialization and Validation of user arguments
+ if arguments['keywords']:
+ search_keyword = [str(item) for item in arguments['keywords'].split(',')]
+
+ if arguments['keywords_from_file']:
+ search_keyword = self.keywords_from_file(arguments['keywords_from_file'])
+
+ # both time and time range should not be allowed in the same query
+ if arguments['time'] and arguments['time_range']:
+ raise ValueError('Either time or time range should be used in a query. Both cannot be used at the same time.')
+
+ # both time and time range should not be allowed in the same query
+ if arguments['size'] and arguments['exact_size']:
+ raise ValueError('Either "size" or "exact_size" should be used in a query. Both cannot be used at the same time.')
+
+ # both image directory and no image directory should not be allowed in the same query
+ if arguments['image_directory'] and arguments['no_directory']:
+ raise ValueError('You can either specify image directory or specify no image directory, not both!')
+
+ # Additional words added to keywords
+ if arguments['suffix_keywords']:
+ suffix_keywords = [" " + str(sk) for sk in arguments['suffix_keywords'].split(',')]
+ else:
+ suffix_keywords = ['']
+
+ # Additional words added to keywords
+ if arguments['prefix_keywords']:
+ prefix_keywords = [str(sk) + " " for sk in arguments['prefix_keywords'].split(',')]
+ else:
+ prefix_keywords = ['']
- if args.delay:
- time.sleep(int(delay_time))
+ # Setting limit on number of images to be downloaded
+ if arguments['limit']:
+ limit = int(arguments['limit'])
+ else:
+ limit = 100
+
+ if arguments['url']:
+ current_time = str(datetime.datetime.now()).split('.')[0]
+ search_keyword = [current_time.replace(":", "_")]
+
+ if arguments['similar_images']:
+ current_time = str(datetime.datetime.now()).split('.')[0]
+ search_keyword = [current_time.replace(":", "_")]
+
+ # If single_image or url argument not present then keywords is mandatory argument
+ if arguments['single_image'] is None and arguments['url'] is None and arguments['similar_images'] is None and \
+ arguments['keywords'] is None and arguments['keywords_from_file'] is None:
+ print('-------------------------------\n'
+ 'Uh oh! Keywords is a required argument \n\n'
+ 'Please refer to the documentation on guide to writing queries \n'
+ 'https://github.com/hardikvasa/google-images-download#examples'
+ '\n\nexiting!\n'
+ '-------------------------------')
+ sys.exit()
+
+ # If this argument is present, set the custom output directory
+ if arguments['output_directory']:
+ main_directory = arguments['output_directory']
+ else:
+ main_directory = "downloads"
+
+ # Proxy settings
+ if arguments['proxy']:
+ os.environ["http_proxy"] = arguments['proxy']
+ os.environ["https_proxy"] = arguments['proxy']
+ ######Initialization Complete
+ total_errors = 0
+ for pky in prefix_keywords: # 1.for every prefix keywords
+ for sky in suffix_keywords: # 2.for every suffix keywords
+ i = 0
+ while i < len(search_keyword): # 3.for every main keyword
+ iteration = "\n" + "Item no.: " + str(i + 1) + " -->" + " Item name = " + (pky) + (search_keyword[i]) + (sky)
+ if not arguments["silent_mode"]:
+ print(iteration.encode('raw_unicode_escape').decode('utf-8'))
+ print("Evaluating...")
+ else:
+ print("Downloading images for: " + (pky) + (search_keyword[i]) + (sky) + " ...")
+ search_term = pky + search_keyword[i] + sky
+
+ if arguments['image_directory']:
+ dir_name = arguments['image_directory']
+ elif arguments['no_directory']:
+ dir_name = ''
+ else:
+ dir_name = search_term + ('-' + arguments['color'] if arguments['color'] else '') #sub-directory
+
+ if not arguments["no_download"]:
+ self.create_directories(main_directory,dir_name,arguments['thumbnail'],arguments['thumbnail_only']) #create directories in OS
+
+ params = self.build_url_parameters(arguments) #building URL with params
+
+ url = self.build_search_url(search_term,params,arguments['url'],arguments['similar_images'],arguments['specific_site'],arguments['safe_search']) #building main search url
+
+ if limit < 101:
+ raw_html = self.download_page(url) # download page
+ else:
+ raw_html = self.download_extended_page(url,arguments['chromedriver'])
+
+ if not arguments["silent_mode"]:
+ if arguments['no_download']:
+ print("Getting URLs without downloading images...")
+ else:
+ print("Starting Download...")
+ items,errorCount,abs_path = self._get_all_items(raw_html,main_directory,dir_name,limit,arguments) #get all image items and download images
+ paths[pky + search_keyword[i] + sky] = abs_path
+
+ #dumps into a json file
+ if arguments['extract_metadata']:
+ try:
+ if not os.path.exists("logs"):
+ os.makedirs("logs")
+ except OSError as e:
+ print(e)
+ json_file = open("logs/"+search_keyword[i]+".json", "w")
+ json.dump(items, json_file, indent=4, sort_keys=True)
+ json_file.close()
+
+ #Related images
+ if arguments['related_images']:
+ print("\nGetting list of related keywords...this may take a few moments")
+ tabs = self.get_all_tabs(raw_html)
+ for key, value in tabs.items():
+ final_search_term = (search_term + " - " + key)
+ print("\nNow Downloading - " + final_search_term)
+ if limit < 101:
+ new_raw_html = self.download_page(value) # download page
+ else:
+ new_raw_html = self.download_extended_page(value,arguments['chromedriver'])
+ self.create_directories(main_directory, final_search_term,arguments['thumbnail'],arguments['thumbnail_only'])
+ self._get_all_items(new_raw_html, main_directory, search_term + " - " + key, limit,arguments)
- if success_count < limit:
- print("\n\nUnfortunately all " + str(limit) + " could not be downloaded because some images were not downloadable. " + str(success_count) + " is all we got for this search filter!")
- i = i + 1
- return errorCount
+ i += 1
+ total_errors = total_errors + errorCount
+ if not arguments["silent_mode"]:
+ print("\nErrors: " + str(errorCount) + "\n")
+ return paths, total_errors
#------------- Main Program -------------#
-if args.single_image: #Download Single Image using a URL
- single_image()
-else: # or download multiple images based on keywords/keyphrase search
+def main():
+ records = user_input()
+ total_errors = 0
t0 = time.time() # start the timer
- errorCount = bulk_download(search_keyword,suffix_keywords,limit,main_directory,delay_time,print_url)
-
- print("\nEverything downloaded!")
- print("Total Errors: " + str(errorCount) + "\n")
- t1 = time.time() # stop the timer
- total_time = t1 - t0 # Calculating the total time required to crawl, find and download all the links of 60,000 images
- print("Total time taken: " + str(total_time) + " Seconds")
-#--------End of the main program --------#
+ for arguments in records:
+
+ if arguments['single_image']: # Download Single Image using a URL
+ response = googleimagesdownload()
+ response.single_image(arguments['single_image'])
+ else: # or download multiple images based on keywords/keyphrase search
+ response = googleimagesdownload()
+ paths,errors = response.download(arguments) #wrapping response in a variable just for consistency
+ total_errors = total_errors + errors
+
+ t1 = time.time() # stop the timer
+ total_time = t1 - t0 # Calculating the total time required to crawl, find and download all the links of 60,000 images
+ if not arguments["silent_mode"]:
+ print("\nEverything downloaded!")
+ print("Total errors: " + str(total_errors))
+ print("Total time taken: " + str(total_time) + " Seconds")
+
+if __name__ == "__main__":
+ main()
# In[ ]:
diff --git a/google_images_download/sample_config.json b/google_images_download/sample_config.json
new file mode 100644
index 00000000..b259c9bb
--- /dev/null
+++ b/google_images_download/sample_config.json
@@ -0,0 +1,16 @@
+{
+ "Records": [
+ {
+ "keywords": "apple",
+ "limit": 5,
+ "color": "green",
+ "print_urls": true
+ },
+ {
+ "keywords": "universe",
+ "limit": 15,
+ "size": "large",
+ "print_urls": true
+ }
+ ]
+}
diff --git a/images/flow-chart.png b/images/flow-chart.png
new file mode 100644
index 00000000..cdcce313
Binary files /dev/null and b/images/flow-chart.png differ
diff --git a/requirements.txt b/requirements.txt
index e69de29b..954f0db0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1 @@
+selenium
\ No newline at end of file
diff --git a/setup.cfg b/setup.cfg
index cb4a338e..c26b8326 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -2,4 +2,4 @@
universal=1
[metadata]
-description-file=README.md
\ No newline at end of file
+description-file=README.rst
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 449f920a..e0d8c80c 100644
--- a/setup.py
+++ b/setup.py
@@ -2,28 +2,13 @@
from codecs import open
from os import path
-__version__ = '1.0.2'
+__version__ = '2.8.0'
here = path.abspath(path.dirname(__file__))
# Get the long description from the README file
-try:
- import pypandoc
- if path.isfile('README.rst'):
- print("README.rst already exist.")
- print("NOT REFRESHING README.rst")
- else:
- long_description = pypandoc.convert_file('README.md', 'rst')
- with open("README.rst", "w") as f:
- f.write(long_description)
-
- with open('README.rst', encoding='utf-8') as f:
- long_description = f.read()
-except Exception as e:
- print("Error:{}:{}".format(type(e), e))
- print("NOT REFRESHING README.rst")
- with open('README.md', encoding='utf-8') as f:
- long_description = f.read()
+with open('README.rst', encoding='utf-8') as f:
+ long_description = f.read()
# get the dependencies and installs
with open(path.join(here, 'requirements.txt'), encoding='utf-8') as f:
@@ -41,24 +26,25 @@
download_url='https://github.com/hardikvasa/google-images-download/tarball/' + __version__,
license='MIT',
classifiers=[
- 'Development Status :: 3 - Alpha',
- 'Intended Audience :: Developers',
- 'Programming Language :: Python :: 2.7',
- 'Programming Language :: Python :: 3.3',
- 'Programming Language :: Python :: 3.4',
- 'Programming Language :: Python :: 3.5',
- 'Programming Language :: Python :: 3.6'
+ 'Development Status :: 4 - Beta',
+ 'Intended Audience :: Developers',
+ 'License :: OSI Approved :: MIT License',
+ 'Programming Language :: Python :: 2.7',
+ 'Programming Language :: Python :: 3.3',
+ 'Programming Language :: Python :: 3.4',
+ 'Programming Language :: Python :: 3.5',
+ 'Programming Language :: Python :: 3.6',
],
- keywords='',
+ keywords='google images download save filter color image-search image-dataset image-scrapper image-gallery terminal command-line',
packages=find_packages(exclude=['docs', 'tests*']),
include_package_data=True,
author='Hardik Vasa',
install_requires=install_requires,
dependency_links=dependency_links,
- author_email='psuzzn@gmail.com',
+ author_email='hnvasa@gmail.com',
entry_points={
'console_scripts': [
- 'googleimagesdownload = google_images_download.__init__:main'
+ 'googleimagesdownload = google_images_download.google_images_download:main'
]},
)
diff --git a/tests/test_google_images_download.py b/tests/test_google_images_download.py
new file mode 100644
index 00000000..ec62afd0
--- /dev/null
+++ b/tests/test_google_images_download.py
@@ -0,0 +1,53 @@
+from google_images_download import google_images_download
+import os, errno
+import time
+
+
+def silent_remove_of_file(file):
+ try:
+ os.remove(file)
+ except OSError as e:
+ if e.errno != errno.ENOENT:
+ raise e
+ return False
+ return True
+
+
+def test_download_images_to_default_location():
+ start_time = time.time()
+ argumnets = {
+ "keywords": "Polar bears",
+ "limit": 5,
+ "print_urls": False
+ }
+ try:
+ temp = argumnets['output_folder']
+ except KeyError:
+ pass
+ else:
+ assert False, "This test checks download to default location yet an output folder was provided"
+
+ output_folder_path = os.path.join(os.path.realpath('.'), 'downloads', '{}'.format(argumnets['keywords']))
+ if os.path.exists(output_folder_path):
+ start_amount_of_files_in_output_folder = len([name for name in os.listdir(output_folder_path) if os.path.isfile(os.path.join(output_folder_path, name)) and os.path.getctime(os.path.join(output_folder_path, name)) < start_time])
+ else:
+ start_amount_of_files_in_output_folder = 0
+
+ response = google_images_download.googleimagesdownload()
+ response.download(argumnets)
+ files_modified_after_test_started = [name for name in os.listdir(output_folder_path) if os.path.isfile(os.path.join(output_folder_path, name)) and os.path.getmtime(os.path.join(output_folder_path, name)) > start_time]
+ end_amount_of_files_in_output_folder = len(files_modified_after_test_started)
+ print(f"Files downloaded by test {__name__}:")
+ for file in files_modified_after_test_started:
+ print(os.path.join(output_folder_path, file))
+
+
+ # assert end_amount_of_files_in_output_folder - start_amount_of_files_in_output_folder == argumnets['limit']
+ assert end_amount_of_files_in_output_folder == argumnets['limit']
+
+ print(f"Cleaning up all files downloaded by test {__name__}...")
+ for file in files_modified_after_test_started:
+ if silent_remove_of_file(os.path.join(output_folder_path, file)):
+ print(f"Deleted {os.path.join(output_folder_path, file)}")
+ else:
+ print(f"Failed to delete {os.path.join(output_folder_path, file)}")
\ No newline at end of file
diff --git a/update_docs.sh b/update_docs.sh
deleted file mode 100644
index a092a3a4..00000000
--- a/update_docs.sh
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/usr/bin/env bash
-
-# build the docs
-cd docs
-make clean
-make html
-cd ..
-
-# commit and push
-git add -A
-git commit -m "building and pushing docs"
-git push origin master
-
-# switch branches and pull the data we want
-git checkout gh-pages
-rm -rf .
-touch .nojekyll
-git checkout master docs/build/html
-mv ./docs/build/html/* ./
-rm -rf ./docs
-git add -A
-git commit -m "publishing updated docs..."
-git push origin gh-pages
-
-# switch back
-git checkout master
\ No newline at end of file