88import re
99import concurrent .futures
1010from urllib .parse import urlparse , urljoin
11+ from urllib3 .exceptions import InsecureRequestWarning
1112from zipfile import ZipFile , ZIP_DEFLATED
1213from random import shuffle , uniform
1314from time import sleep
1819class Comic :
1920 """Comic class. Contains chapters."""
2021
21- def __init__ (self , comic_url , program_args ):
22+ def __init__ (self , comic_url , program_args , verify_https ):
2223 """Init function. Creates chapters for the given comic."""
2324 self .url = comic_url
2425 self .name = comic_url .split ('/' )[- 1 ] \
@@ -34,6 +35,8 @@ def __init__(self, comic_url, program_args):
3435 self .wait_time = program_args .waittime
3536 self .max_retries = program_args .retries
3637 self .file_format = program_args .format
38+ # Set verify mode
39+ self .verify_https = verify_https
3740 # Get all chapters and mode of download
3841 self .all_chapters = self .get_chapters ()
3942
@@ -95,7 +98,7 @@ def manga_extract_chapters(self):
9598 urlscheme = urlparse (url )
9699
97100 # Get chapters
98- r = requests .get (url )
101+ r = requests .get (url , verify = self . verify_https )
99102 soup = bsoup .BeautifulSoup (r .text , 'html.parser' )
100103
101104 chapters = defaultdict (Chapter )
@@ -123,7 +126,7 @@ def comic_extract_chapters(self):
123126 """Extract chapters if it is a comic."""
124127 url = self .url
125128 comic = url .split ('/' )[- 1 ]
126- r = requests .get (url )
129+ r = requests .get (url , verify = self . verify_https )
127130 soup = bsoup .BeautifulSoup (r .text , 'html.parser' )
128131 volume_num = 1
129132
@@ -161,6 +164,8 @@ def __init__(self, comic, chapter_num, volume_num, chapter_url):
161164 self .wait_time = comic .wait_time
162165 self .max_retries = comic .max_retries
163166 self .comic_file_format = comic .file_format
167+ # Set verify mode
168+ self .verify_https = comic .verify_https
164169
165170 def download_chapter (self ):
166171 """Download and convert it into a cbz file."""
@@ -219,7 +224,7 @@ def manga_get_pages(self):
219224
220225 while True :
221226 # Get javascript blocks
222- r = requests .get (base_url )
227+ r = requests .get (base_url , verify = self . verify_https )
223228 soup = bsoup .BeautifulSoup (r .text , 'html.parser' )
224229 scripts = [script for script in soup .find_all (
225230 'script' , attrs = {'type' : 'text/javascript' })]
@@ -253,7 +258,7 @@ def manga_get_pages(self):
253258 def comic_get_pages (self ):
254259 """Obtain list of pages in a comic chapter."""
255260 url = self .chapter_url
256- r = requests .get (url )
261+ r = requests .get (url , verify = self . verify_https )
257262 soup = bsoup .BeautifulSoup (r .text , 'html.parser' )
258263 images = [image .get ('src' ) for image in soup .find_all (
259264 'img' , attrs = {'class' : "chapter_img" })]
@@ -273,12 +278,12 @@ def manga_download_page(self, page):
273278 wait_retry_time = deepcopy (self .wait_time )
274279
275280 while True :
276- r = requests .get (page_url )
281+ r = requests .get (page_url , verify = self . verify_https )
277282 soup = bsoup .BeautifulSoup (r .text , 'html.parser' )
278283 img = soup .find_all ('img' , attrs = {'id' : 'image' })
279284 if img :
280285 image = img [0 ].get ('src' )
281- download_image (image , filename )
286+ download_image (image , filename , self . verify_https )
282287 return True
283288 elif (max_retries > 0 ):
284289 # Idea from manga_downloader (which in turn was from wget)
@@ -303,9 +308,9 @@ def comic_download_page(self, page):
303308 return True
304309
305310
306- def download_image (url , filename ):
311+ def download_image (url , filename , verify_https ):
307312 """Download image (url) and save (filename)."""
308- response = requests .get (url , stream = True )
313+ response = requests .get (url , stream = True , verify = verify_https )
309314 with open (filename , 'wb' ) as out_file :
310315 shutil .copyfileobj (response .raw , out_file )
311316 del response
@@ -317,7 +322,7 @@ def zipdir(folder, filename):
317322 zipf = ZipFile (filename , 'w' , ZIP_DEFLATED )
318323 for root , dirs , files in os .walk (folder ):
319324 # note: ignore empty directories
320- for fn in files :
325+ for fn in sorted ( files ) :
321326 zipf .write (
322327 os .path .join (root , fn ),
323328 os .path .relpath (os .path .join (root , fn ), folder ))
@@ -331,7 +336,7 @@ def pdfdir(folder, filename):
331336 for root , dirs , files in os .walk (folder ):
332337 # Convert images to pdf
333338 f .write (img2pdf .convert (
334- [os .path .join (root , fn ) for fn in files ]))
339+ [os .path .join (root , fn ) for fn in sorted ( files ) ]))
335340
336341
337342def main ():
@@ -353,7 +358,7 @@ def main():
353358 "-c" , "--chapters" , default = False ,
354359 help = "Specify chapters to download separated by : (10:20)." )
355360 parser .add_argument (
356- "-ct" , "--chapterthreads" , default = 2 ,
361+ "-ct" , "--chapterthreads" , default = 5 ,
357362 help = "Number of parallel chapters downloads." )
358363 parser .add_argument (
359364 "-pt" , "--pagethreads" , default = 10 ,
@@ -371,7 +376,21 @@ def main():
371376 args = parser .parse_args ()
372377
373378 for url in args .urls :
374- comic = Comic (url , args )
379+ # If https, check before using verify False
380+ urlscheme = urlparse (url )
381+ verify_https = False
382+ if urlscheme .scheme == 'https' :
383+ try :
384+ requests .get (url )
385+ verify_https = True
386+ except requests .exceptions .SSLError :
387+ verify_https = False
388+ print ('Could not validate https certificate for url:' +
389+ '%s. Proceeding with Insecure certificate.' % (url ))
390+ requests .packages .urllib3 .disable_warnings (
391+ category = InsecureRequestWarning )
392+
393+ comic = Comic (url , args , verify_https )
375394 print ('Downloading comic: ' + comic .name )
376395
377396 # Get chapters to download
0 commit comments