Skip to content

Commit c5b2cd6

Browse files
committed
Fix buf for Google safe mode; Fix bug when create directories accroding to keywords;
1 parent c214834 commit c5b2cd6

File tree

5 files changed

+34
-7
lines changed

5 files changed

+34
-7
lines changed

crawler.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929

3030

3131
def google_gen_query_url(keywords, face_only=False, safe_mode=False):
32-
base_url = "https://www.google.com/search?tbm=isch"
32+
base_url = "https://www.google.com/search?tbm=isch&hl=en"
3333
keywords_str = "&q=" + quote(keywords)
3434
query_url = base_url + keywords_str
3535
if face_only is True:
@@ -42,7 +42,7 @@ def google_gen_query_url(keywords, face_only=False, safe_mode=False):
4242

4343

4444
def google_image_url_from_webpage(driver):
45-
time.sleep(10)
45+
# time.sleep(10)
4646
image_elements = driver.find_elements_by_class_name("rg_l")
4747
image_urls = list()
4848
url_pattern = "imgurl=\S*&imgrefurl"

downloader.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,12 @@
1212

1313

1414
headers = {
15-
'Connection': 'close',
16-
'User-Agent': 'Chrome/54.0.2840.100'
15+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
16+
"Proxy-Connection": "keep-alive",
17+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
18+
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36",
19+
"Accept-Encoding": "gzip, deflate, sdch",
20+
# 'Connection': 'close',
1721
}
1822

1923

@@ -59,6 +63,7 @@ def download_images(image_urls, dst_dir, file_prefix="img", concurrency=50, time
5963
:param concurrency: number of requests process simultaneously
6064
:return: none
6165
"""
66+
6267
with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor:
6368
futures = list()
6469
count = 0

mainwindow.ui

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -737,6 +737,9 @@ per keywords</string>
737737
<property name="text">
738738
<string>HTTP</string>
739739
</property>
740+
<property name="checked">
741+
<bool>true</bool>
742+
</property>
740743
<attribute name="buttonGroup">
741744
<string notr="true">buttonGroup_2</string>
742745
</attribute>
@@ -762,7 +765,7 @@ per keywords</string>
762765
<string>Socks5</string>
763766
</property>
764767
<property name="checked">
765-
<bool>true</bool>
768+
<bool>false</bool>
766769
</property>
767770
<attribute name="buttonGroup">
768771
<string notr="true">buttonGroup_2</string>
@@ -776,6 +779,15 @@ per keywords</string>
776779
<pointsize>12</pointsize>
777780
</font>
778781
</property>
782+
<property name="toolTip">
783+
<string>input ip:port</string>
784+
</property>
785+
<property name="statusTip">
786+
<string>xxx.xxx.xxx.xx:port</string>
787+
</property>
788+
<property name="placeholderText">
789+
<string>xxx.xxx.xxx.xx:port</string>
790+
</property>
779791
</widget>
780792
</item>
781793
</layout>

ui_mainwindow.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,7 @@ def setupUi(self, MainWindow):
416416
font.setPointSize(12)
417417
self.radioButton_http.setFont(font)
418418
self.radioButton_http.setFocusPolicy(QtCore.Qt.TabFocus)
419+
self.radioButton_http.setChecked(True)
419420
self.radioButton_http.setObjectName(_fromUtf8("radioButton_http"))
420421
self.buttonGroup_2 = QtGui.QButtonGroup(MainWindow)
421422
self.buttonGroup_2.setObjectName(_fromUtf8("buttonGroup_2"))
@@ -431,7 +432,7 @@ def setupUi(self, MainWindow):
431432
font.setPointSize(12)
432433
self.radioButton_socks5.setFont(font)
433434
self.radioButton_socks5.setFocusPolicy(QtCore.Qt.TabFocus)
434-
self.radioButton_socks5.setChecked(True)
435+
self.radioButton_socks5.setChecked(False)
435436
self.radioButton_socks5.setObjectName(_fromUtf8("radioButton_socks5"))
436437
self.buttonGroup_2.addButton(self.radioButton_socks5)
437438
self.horizontalLayout_3.addWidget(self.radioButton_socks5)
@@ -557,6 +558,9 @@ def retranslateUi(self, MainWindow):
557558
self.checkBox_proxy.setText(_translate("MainWindow", "&Proxy:", None))
558559
self.radioButton_http.setText(_translate("MainWindow", "HTTP", None))
559560
self.radioButton_socks5.setText(_translate("MainWindow", "Socks5", None))
561+
self.lineEdit_proxy.setToolTip(_translate("MainWindow", "input ip:port", None))
562+
self.lineEdit_proxy.setStatusTip(_translate("MainWindow", "xxx.xxx.xxx.xx:port", None))
563+
self.lineEdit_proxy.setPlaceholderText(_translate("MainWindow", "xxx.xxx.xxx.xx:port", None))
560564
self.menuAbout.setTitle(_translate("MainWindow", "Help", None))
561565
self.actionAbout.setText(_translate("MainWindow", "About", None))
562566

utils.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
# -*- coding: utf-8 -*-
22

33

4+
def gen_valid_dir_name_for_keywords(keywords):
5+
keep = ["-", "_", "."]
6+
keywords = keywords.replace(" ", "_").replace(":", "-")
7+
return "".join(c for c in keywords if c.isalnum() or c in keep).rstrip()
8+
9+
410
class AppConfig(object):
511
def __init__(self):
612
self.engine = "Google"
@@ -29,7 +35,7 @@ def to_command_paras(self):
2935

3036
str_paras += ' -j ' + str(self.num_threads)
3137

32-
str_paras += ' -o "' + self.output_dir + '/' + self.keywords + '"'
38+
str_paras += ' -o "' + self.output_dir + '/' + gen_valid_dir_name_for_keywords(self.keywords) + '"'
3339

3440
if self.face_only:
3541
str_paras += ' -F '

0 commit comments

Comments
 (0)