-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathscript.py
More file actions
73 lines (66 loc) · 2.17 KB
/
script.py
File metadata and controls
73 lines (66 loc) · 2.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import requests
from lxml import html
import os
try:
os.mkdir("Images")
except:
pass
#Change the proxy address here...
proxies ={
"http":"http://10.3.100.207:8080",
"https":"http://10.3.100.207:8080",
"ftp":"http://10.3.100.207:8080",
}
headers={
"Connection":"keep-alive",
"User-Agent":"Mozilla/5.0 (Windows NT 6.3; WOW64; rv:27.0) Gecko/20100101 Firefox/34.0"
}
var=raw_input("Please Enter the Search Query for which you want to download 100 images from Google: ")
size=raw_input("Enter l for Large Size Image, m for Medium, i for icon size and n for no preference: ")
var_list=var.split(" ")
var_str='+'.join(var_list)
if (size=="n"):
url_main="https://www.google.com/search?tbm=isch&q="+var_str
else:
url_main="https://www.google.com/search?tbm=isch&q="+var_str+"&tbs=isz:"+size
req=requests.get(url_main, proxies = proxies,headers=headers,verify=False)
r_content=req.content
tree=html.fromstring(r_content)
temp_links=tree.xpath('//a[@class="rg_l"]/@href')
print "List of temp links has been compiled in a single list"
for i in xrange(0,100):
try:
temp_links_str=temp_links[i]
split_temp_links=temp_links_str.split("/")
if (split_temp_links[0]=="https:"):
url=temp_links[i]
elif (split_temp_links[0]=="http:"):
url=temp_links[i]
else:
url="https://www.google.com"+temp_links[i]
req=requests.get(url, proxies = proxies,headers=headers,verify=False)
req_content=req.content
tree=html.fromstring(req_content)
img_links=tree.xpath('//meta[@itemprop="image"]/@content')
img_links_str=img_links[0]
temp_img_links=img_links_str.split("/")
if (temp_img_links[0]=="https:"):
url2=img_links[0]
elif (temp_img_links[0]=="http:"):
url2=img_links[0]
else:
url2="https://www.google.com"+img_links[0]
req2=requests.get(url2, proxies = proxies,headers=headers,verify=False)
req2_content=req2.content
string=req2.url
ext_list=string.split(".")
ext=ext_list[-1]
if (len(ext)==3):
fh=open("Images/"+var+"_"+str(i+1)+"."+ext,"wb+")
fh.write(req2_content)
fh.close()
print "Got Image",i+1,"of 100"
else:
print "Skipping file",i+1,"of 100 because it is invalid"
except:
print "Encountered an error skipping",i+1,"of 100"