2222 "verifyCert" : "yes"
2323}
2424
25+
2526def to_str (bytes_or_str ):
2627 if isinstance (bytes_or_str , bytes ):
2728 value = bytes_or_str .decode ('utf-8' )
@@ -44,7 +45,8 @@ def fetch(url):
4445 proxy = config ['proxy' ]
4546 proxies = dict (http = proxy , https = proxy )
4647 try :
47- resp = requests .get (url , proxies = proxies , verify = verifySSLCert )
48+ resp = requests .get (url , proxies = proxies , verify = (
49+ config ['verifyCert' ] == 'yes' ))
4850 src = to_str (resp .content )
4951 return src
5052 finally :
@@ -92,11 +94,11 @@ def loadConfig():
9294 config ['minContent' ] = cf .get ('config' , 'minContent' )
9395 config ['waitPackage' ] = cf .get ('config' , 'waitPackage' )
9496 config ['verifyCert' ] = cf .get ('network' , 'verifyCert' )
97+ requests .packages .urllib3 .disable_warnings ()
9598 except :
9699 pass
97100
98101
99-
100102def download (url ):
101103 if not (config ['host' ] in url ):
102104 return
@@ -124,7 +126,7 @@ def download(url):
124126 links = content_soup .find_all ('a' )
125127 for a in links :
126128 _title = a .getText ()
127- print (_title )
129+ # print('+%s' % _title)
128130 _url = a .get ('href' )
129131
130132 if (_url and len (_url .strip ()) > 8 ):
@@ -144,7 +146,7 @@ def download(url):
144146 _title = a .getText ()
145147 if ('银元奖励' in _title ) or ('无内容' in _title ) or ('版块基金' in _title ) or (' 给 ' in _title ) or ('幸运红包' in _title ):
146148 continue
147- print ('+%s' % _title )
149+ # print('+%s' % _title)
148150 _u = a .get ('href' )
149151 if (_u and _u .startswith ("http" )):
150152 hive .append (_u )
@@ -155,7 +157,7 @@ def download(url):
155157
156158 # SKIP DOWNLOADED FILES
157159 if (os .path .exists ("%s-%s.html" % (tid , title ))):
158- print ("#%s-%s.html already exists." % (tid , title ))
160+ print ("#%s-%s.html already exists." % (tid , title ), file = sys . stderr )
159161 return
160162
161163 [s .extract () for s in content_soup ('script' )]
@@ -174,28 +176,22 @@ def download(url):
174176 try :
175177 with open ("%s-%s.html" % (tid , title ), 'w+' , encoding = 'utf-8' , errors = 'ignore' ) as file :
176178 file .write (
177- '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE html><html><head><META HTTP-EQUIV="content-type" CONTENT="text/ html; charset=utf-8"> <title>' )
179+ r '<?xml version="1.0" encoding="utf-8" standalone="no" ?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">< html xmlns="http://www.w3.org/1999/xhtml" xml:lang="zh-CN"><head> <title>' )
178180 file .write (title )
179- file .write ("</title></head><body><pre ><p>" )
181+ file .write (r "</title></head><body><p>" )
180182 file .write (page_content )
181- file .write ("</p></pre></body></html>" )
182- print ('>Done' )
183+ file .write (r"</p></body></html>" )
183184 except :
184- print ("Error writing %s" % title )
185+ print ("Error writing %s" % title , file = sys . stderr )
185186
186187
187188# Main Logic
188189if __name__ == '__main__' :
189- verifySSLCert = True
190190 parser = argparse .ArgumentParser (
191- description = "Download articles from cool18.com then generate epub." )
192- parser .add_argument ("url" , type = str , help = "a cool18.com article URL." )
191+ description = r "Download articles from cool18.com then generate epub." )
192+ parser .add_argument ("url" , type = str , help = r "a cool18.com article URL." )
193193 args = parser .parse_args ()
194194 loadConfig ()
195- if config ['verifyCert' ] == 'yes' :
196- verifySSLCert = True
197- else :
198- verifySSLCert = False
199195 pypath = sys .argv [0 ]
200196 pydir = os .getcwd ()
201197
@@ -215,11 +211,12 @@ def download(url):
215211 while hive :
216212 current_url = hive .pop ()
217213 if (current_url in downloaded ):
218- print ( "-%s " % current_url )
214+ pass
219215 else :
220- print ("~[%d] %s" % (len (hive ), current_url ))
221- downloaded .add (current_url )
216+ print (r"~[%3d]%s" % (len (hive ), current_url ))
222217 download (current_url )
218+ downloaded .add (current_url )
219+
223220 if config ['waitPackage' ] == 'yes' :
224221 input ('>Press Enter when ready...' )
225222
0 commit comments