Skip to content

Commit 6922ab3

Browse files
committed
remove chardet
1 parent 07541b8 commit 6922ab3

File tree

1 file changed

+13
-12
lines changed

1 file changed

+13
-12
lines changed

7.cool18-Article2epub-multithread/c2epub.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
import urllib
1111

1212
import bs4
13-
import chardet
1413
import html2epub
1514
import requests
1615

@@ -23,7 +22,7 @@
2322
"waitPackage": "no",
2423
"autoDelete": "yes",
2524
"verifyCert": "yes",
26-
"threads": 3
25+
"threads": 5
2726
}
2827

2928

@@ -38,8 +37,10 @@ def find_all(a_str, sub):
3837

3938

4039
def to_str(bytes_or_str):
41-
codec = chardet.detect(bytes_or_str)
42-
value = bytes_or_str.decode(encoding=codec['encoding'])
40+
try:
41+
value = bytes_or_str.decode(encoding="UTF-8")
42+
except:
43+
value = bytes_or_str.decode(encoding="GBK")
4344
return value
4445

4546

@@ -126,7 +127,7 @@ def loadConfig():
126127
pass
127128

128129

129-
def download(url,threadname):
130+
def download(url, threadname):
130131

131132
uri = urllib.parse.urlparse(url)
132133
params = urllib.parse.parse_qs(uri.query)
@@ -137,7 +138,7 @@ def download(url,threadname):
137138

138139
src = fetch(url)
139140
title = extract_title(src, full=True)
140-
print(f'{threadname}:GOT {title}')
141+
print(f'{threadname}:PROC {title}')
141142
# REMOVE BLANKS
142143
raw = str(src)
143144

@@ -183,7 +184,7 @@ def download(url,threadname):
183184
downloading.pop()
184185
# SKIP DOWNLOADED FILES
185186
if (os.path.exists("%s-%s.html" % (tid, title))):
186-
print(f"{threadname}:SKP {tid}-{title}.html" , file=sys.stderr)
187+
print(f"{threadname}:SKIP {tid}-{title}.html", file=sys.stderr)
187188
return
188189

189190
[s.extract() for s in content_soup('script')]
@@ -219,9 +220,9 @@ def download(url,threadname):
219220
file.write(page_content)
220221
file.write(r"</p></body></html>")
221222
except:
222-
print(f"{threadname}:Error writing {title}", file=sys.stderr)
223+
print(f"{threadname}:ERR CAN'T WRITE {title}", file=sys.stderr)
223224
else:
224-
print(f'{threadname}:IGN {title}')
225+
print(f'{threadname}:PASS {title}')
225226
# add to downloaded
226227
downloaded.add(url)
227228

@@ -231,7 +232,7 @@ def __init__(self, name, q):
231232
threading.Thread.__init__(self)
232233
self.name = name
233234
self.q = q
234-
self.daemon=True
235+
self.daemon = True
235236

236237
def run(self):
237238
while not exitflag:
@@ -242,7 +243,7 @@ def run(self):
242243
threadlock.release()
243244
if (url):
244245
downloading.append(url)
245-
download(url,self.name)
246+
download(url, self.name)
246247

247248

248249
workqueue = queue.Queue()
@@ -259,7 +260,7 @@ def run(self):
259260
if (args_length > 1):
260261
url = sys.argv[1]
261262
if (not url):
262-
url = str(input("请粘贴cool18站的文章网址:"))
263+
url = str(input("#请粘贴cool18站的文章网址:"))
263264
loadConfig()
264265
pypath = sys.argv[0]
265266
pydir = os.getcwd()

0 commit comments

Comments
 (0)