1
+ import os
2
+ import time
3
+ import urllib
4
+ import requests
5
+ from urllib .parse import quote
6
+ import array as arr
7
+
8
+ class simple_image_download :
9
+ def __init__ (self ):
10
+ pass
11
+
12
+ def urls (self , keywords , limit ):
13
+ keyword_to_search = [str (item ).strip () for item in keywords .split (',' )]
14
+ i = 0
15
+ links = []
16
+ while i < len (keyword_to_search ):
17
+ url = 'https://www.google.com/search?q=' + quote (
18
+ keyword_to_search [i ].encode (
19
+ 'utf-8' )) + '&biw=1536&bih=674&tbm=isch&sxsrf=ACYBGNSXXpS6YmAKUiLKKBs6xWb4uUY5gA:1581168823770&source=lnms&sa=X&ved=0ahUKEwioj8jwiMLnAhW9AhAIHbXTBMMQ_AUI3QUoAQ'
20
+ raw_html = self ._download_page (url )
21
+
22
+ end_object = - 1 ;
23
+
24
+ j = 0
25
+ while j < limit :
26
+ while (True ):
27
+ try :
28
+ new_line = raw_html .find ('"https://' , end_object + 1 )
29
+ end_object = raw_html .find ('"' , new_line + 1 )
30
+
31
+ buffor = raw_html .find ('\\ ' , new_line + 1 , end_object )
32
+ if buffor != - 1 :
33
+ object_raw = (raw_html [new_line + 1 :buffor ])
34
+ else :
35
+ object_raw = (raw_html [new_line + 1 :end_object ])
36
+
37
+ if '.jpg' in object_raw or 'png' in object_raw or '.ico' in object_raw or '.gif' in object_raw or '.jpeg' in object_raw :
38
+ break
39
+
40
+ except Exception as e :
41
+ print (e )
42
+ break
43
+
44
+ links .append (object_raw )
45
+ j += 1
46
+
47
+ i += 1
48
+ return (links )
49
+
50
+
51
+ def download (self , keywords , limit ):
52
+ keyword_to_search = [str (item ).strip () for item in keywords .split (',' )]
53
+ main_directory = "simple_images/"
54
+ i = 0
55
+
56
+ while i < len (keyword_to_search ):
57
+ self ._create_directories (main_directory , keyword_to_search [i ])
58
+ url = 'https://www.google.com/search?q=' + quote (
59
+ keyword_to_search [i ].encode ('utf-8' )) + '&biw=1536&bih=674&tbm=isch&sxsrf=ACYBGNSXXpS6YmAKUiLKKBs6xWb4uUY5gA:1581168823770&source=lnms&sa=X&ved=0ahUKEwioj8jwiMLnAhW9AhAIHbXTBMMQ_AUI3QUoAQ'
60
+ raw_html = self ._download_page (url )
61
+
62
+ end_object = - 1 ;
63
+
64
+ j = 0
65
+ while j < limit :
66
+ while (True ):
67
+ try :
68
+ new_line = raw_html .find ('"https://' , end_object + 1 )
69
+ end_object = raw_html .find ('"' , new_line + 1 )
70
+
71
+ buffor = raw_html .find ('\\ ' , new_line + 1 , end_object )
72
+ if buffor != - 1 :
73
+ object_raw = (raw_html [new_line + 1 :buffor ])
74
+ else :
75
+ object_raw = (raw_html [new_line + 1 :end_object ])
76
+
77
+ if '.jpg' in object_raw or 'png' in object_raw or '.ico' in object_raw or '.gif' in object_raw or '.jpeg' in object_raw :
78
+ break
79
+
80
+ except Exception as e :
81
+ print (e )
82
+ break
83
+
84
+ path = main_directory + keyword_to_search [i ]
85
+
86
+ #print(object_raw)
87
+
88
+ if not os .path .exists (path ):
89
+ os .makedirs (path )
90
+
91
+ filename = str (keyword_to_search [i ]) + "_" + str (j + 1 ) + ".jpg"
92
+
93
+ try :
94
+ r = requests .get (object_raw , allow_redirects = True )
95
+ open (os .path .join (path , filename ), 'wb' ).write (r .content )
96
+ except Exception as e :
97
+ print (e )
98
+ j -= 1
99
+ j += 1
100
+
101
+ i += 1
102
+
103
+
104
+ def _create_directories (self , main_directory , name ):
105
+ try :
106
+ if not os .path .exists (main_directory ):
107
+ os .makedirs (main_directory )
108
+ time .sleep (0.2 )
109
+ path = (name )
110
+ sub_directory = os .path .join (main_directory , path )
111
+ if not os .path .exists (sub_directory ):
112
+ os .makedirs (sub_directory )
113
+ else :
114
+ path = (name )
115
+ sub_directory = os .path .join (main_directory , path )
116
+ if not os .path .exists (sub_directory ):
117
+ os .makedirs (sub_directory )
118
+
119
+ except OSError as e :
120
+ if e .errno != 17 :
121
+ raise
122
+ pass
123
+ return
124
+
125
+ def _download_page (self ,url ):
126
+
127
+ try :
128
+ headers = {}
129
+ headers ['User-Agent' ] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36"
130
+ req = urllib .request .Request (url , headers = headers )
131
+ resp = urllib .request .urlopen (req )
132
+ respData = str (resp .read ())
133
+ return respData
134
+
135
+ except Exception as e :
136
+ print (e )
137
+ exit (0 )
0 commit comments