1818 "proxy" : "socks5://127.0.0.1:1081" ,
1919 "minContent" : 1000 ,
2020 "waitPackage" : "no" ,
21- "autoDelete" :"yes"
21+ "autoDelete" : "yes" ,
22+ "verifyCert" : "yes"
2223}
2324
24-
2525def to_str (bytes_or_str ):
2626 if isinstance (bytes_or_str , bytes ):
2727 value = bytes_or_str .decode ('utf-8' )
@@ -39,11 +39,12 @@ def to_bytes(bytes_or_str):
3939
4040
4141def fetch (url ):
42+
4243 if config ['enableProxy' ] == 'yes' :
4344 proxy = config ['proxy' ]
4445 proxies = dict (http = proxy , https = proxy )
4546 try :
46- resp = requests .get (url , proxies = proxies )
47+ resp = requests .get (url , proxies = proxies , verify = verifySSLCert )
4748 src = to_str (resp .content )
4849 return src
4950 finally :
@@ -62,19 +63,22 @@ def fetch(url):
6263L_START = '''<a name="followups" style=''>'''
6364L_END = '''<a name="postfp">'''
6465
65- def extract_title (content ,full = False ):
66+
67+ def extract_title (content , full = False ):
6668 title_left = content .find ('<title>' )+ len ('<title>' )
6769 title_right = content .find ('</title>' )
6870 title = content [title_left :title_right ]
6971
7072 if (full ):
71- title = title .replace (" - cool18.com" , "" ).replace ("/" ,"-" ).replace ("\\ " ,"-" ).strip ()
73+ title = title .replace (" - cool18.com" , "" ).replace ("/" ,
74+ "-" ).replace ("\\ " , "-" ).strip ()
7275 else :
7376 title_search = re .search ('[【《](.*)[】》]' , title , re .IGNORECASE )
7477 if title_search :
7578 title = title_search .group (1 )
7679 else :
77- title = title .replace (" - cool18.com" , "" ).replace ("/" ,"-" ).replace ("\\ " ,"-" ).strip ()
80+ title = title .replace (
81+ " - cool18.com" , "" ).replace ("/" , "-" ).replace ("\\ " , "-" ).strip ()
7882
7983 return title
8084
@@ -87,10 +91,12 @@ def loadConfig():
8791 config ['proxy' ] = cf .get ('network' , 'proxy' )
8892 config ['minContent' ] = cf .get ('config' , 'minContent' )
8993 config ['waitPackage' ] = cf .get ('config' , 'waitPackage' )
94+ config ['verifyCert' ] = cf .get ('network' , 'verifyCert' )
9095 except :
9196 pass
9297
9398
99+
94100def download (url ):
95101 if not (config ['host' ] in url ):
96102 return
@@ -102,7 +108,7 @@ def download(url):
102108 return
103109
104110 src = fetch (url )
105- title = extract_title (src ,full = True )
111+ title = extract_title (src , full = True )
106112 print ('+%s' % title )
107113
108114 # REMOVE BLANKS
@@ -155,7 +161,7 @@ def download(url):
155161 [s .extract () for s in content_soup ('script' )]
156162
157163 page_content = str (content_soup .find ('body' ).getText ())
158- page_content = page_content .replace ("\n " ,"" )
164+ page_content = page_content .replace ("\n " , "" )
159165 page_content = page_content .replace (
160166 'cool18.com' , '\n ' ).replace ('www.6park.com' , '' ).replace ('6park.com' , '' ).replace ("\n " , "</p><p>" ).replace ("<p></p>" , "" )
161167 try :
@@ -178,17 +184,18 @@ def download(url):
178184 print ("Error writing %s" % title )
179185
180186
181-
182-
183-
184187# Main Logic
185188if __name__ == '__main__' :
189+ verifySSLCert = True
186190 parser = argparse .ArgumentParser (
187191 description = "Download articles from cool18.com then generate epub." )
188- parser .add_argument ("url" ,type = str , help = "a cool18.com article URL." )
192+ parser .add_argument ("url" , type = str , help = "a cool18.com article URL." )
189193 args = parser .parse_args ()
190194 loadConfig ()
191-
195+ if config ['verifyCert' ] == 'yes' :
196+ verifySSLCert = True
197+ else :
198+ verifySSLCert = False
192199 pypath = sys .argv [0 ]
193200 pydir = os .getcwd ()
194201
@@ -224,7 +231,7 @@ def download(url):
224231 epub .add_chapter (chap )
225232 epubpath = epub .create_epub (pydir )
226233 print (">OK, epub generated at: %s" % epubpath )
227-
234+
228235 if config ['autoDelete' ] == 'yes' :
229236 os .chdir (".." )
230237 print (">Deleting Directory: %s" % title )
0 commit comments