1111from random import shuffle , uniform
1212from numpy import arange
1313from time import sleep
14+ from copy import deepcopy
1415
1516
1617class Comic :
17- def __init__ (self , comic_url , root_dir ):
18+ def __init__ (self , comic_url , program_args ):
1819 self .url = comic_url
1920 self .name = comic_url .split ('/' )[- 1 ] \
2021 if comic_url .split ('/' )[- 1 ] else comic_url .split ('/' )[- 2 ]
2122 # Set download location
2223 self .download_location = os .path .abspath (
23- os .path .join (root_dir , self .name ))
24+ os .path .join (program_args . location , self .name ))
2425 if not os .path .exists (self .download_location ):
2526 os .makedirs (self .download_location )
27+ # Set threads and retry values
28+ self .chapter_threads = program_args .chapterthreads
29+ self .page_threads = program_args .pagethreads
30+ self .wait_time = program_args .waittime
31+ self .max_retries = program_args .retries
2632 # Get all chapters and mode of download
2733 self .all_chapters = self .get_chapters ()
2834
2935 def get_chapters (self ):
3036 if 'mangafox' in self .url :
3137 self .mode = ['manga' , 'mangafox' ]
32- chapters = self .manga_extract_chapters (self . url )
38+ chapters = self .manga_extract_chapters ()
3339 elif 'mangahere' in self .url :
3440 self .mode = ['manga' , 'mangahere' ]
35- chapters = self .manga_extract_chapters (self . url )
41+ chapters = self .manga_extract_chapters ()
3642 elif 'readcomics' in self .url :
3743 self .mode = ['comic' ]
38- chapters = self .comic_extract_chapters (self . url )
44+ chapters = self .comic_extract_chapters ()
3945 else :
4046 raise ValueError ('The scraper currently only supports mangafox, ' ,
4147 'mangahere and readcomics.tv ' ,
@@ -55,10 +61,11 @@ def set_download_chapters(self, potential_keys=None):
5561 sorted (unsorted_chapters .items (), key = lambda t : t [0 ]))
5662 # Print downloading chapters
5763 print ("Downloading the below chapters:" )
58- print (keys )
64+ print (sorted ( keys ) )
5965
6066 def download_comic (self ):
61- with concurrent .futures .ThreadPoolExecutor (max_workers = 5 ) as executor :
67+ with concurrent .futures .ThreadPoolExecutor (
68+ max_workers = self .chapter_threads ) as executor :
6269 future_to_chapter = {
6370 executor .submit (chapter .download_chapter ): chapter_num
6471 for chapter_num , chapter in self .chapters_to_download .items ()}
@@ -73,8 +80,9 @@ def download_comic(self):
7380 else :
7481 print ('Downloaded: Chapter-%g' % (chapter_num ))
7582
76- def manga_extract_chapters (self , url ):
83+ def manga_extract_chapters (self ):
7784 comic_name = self .name
85+ url = self .url
7886 r = requests .get (url )
7987 soup = bsoup .BeautifulSoup (r .text , 'html.parser' )
8088
@@ -98,7 +106,8 @@ def manga_extract_chapters(self, url):
98106 self , chapter_num , volume_num , chapter_link )
99107 return chapters
100108
101- def comic_extract_chapters (self , url ):
109+ def comic_extract_chapters (self ):
110+ url = self .url
102111 comic = url .split ('/' )[- 1 ]
103112 r = requests .get (url )
104113 soup = bsoup .BeautifulSoup (r .text , 'html.parser' )
@@ -130,6 +139,10 @@ def __init__(self, comic, chapter_num, volume_num, chapter_url):
130139 self .chapter_num = chapter_num
131140 self .volume_num = volume_num
132141 self .chapter_url = chapter_url
142+ # Threads and retry time
143+ self .page_threads = comic .page_threads
144+ self .wait_time = comic .wait_time
145+ self .max_retries = comic .max_retries
133146
134147 def download_chapter (self ):
135148 ''' Download and convert it into a cbz file '''
@@ -144,7 +157,8 @@ def download_chapter(self):
144157 os .makedirs (self .chapter_location )
145158
146159 # Download individual pages in parallel
147- with concurrent .futures .ThreadPoolExecutor (max_workers = 10 ) as executor :
160+ with concurrent .futures .ThreadPoolExecutor (
161+ max_workers = self .page_threads ) as executor :
148162 executor .map (download_func , pages )
149163
150164 # Convert the folder to a comic book zip filename
@@ -178,8 +192,8 @@ def manga_get_pages(self):
178192 elif (self .comic_mode [1 ] == 'mangahere' ):
179193 base_url = self .chapter_url
180194
181- max_retries = 5
182- wait_retry_time = 5
195+ max_retries = deepcopy ( self . max_retries )
196+ wait_retry_time = deepcopy ( self . wait_time )
183197
184198 while True :
185199 # Get javascript blocks
@@ -232,8 +246,8 @@ def manga_download_page(self, page):
232246 filename = os .path .join (self .chapter_location ,
233247 '%0.3d.jpg' % (page_num ))
234248
235- max_retries = 10
236- wait_retry_time = 10
249+ max_retries = deepcopy ( self . max_retries )
250+ wait_retry_time = deepcopy ( self . wait_time )
237251
238252 while True :
239253 r = requests .get (page_url )
@@ -303,11 +317,23 @@ def main():
303317 parser .add_argument (
304318 "-c" , "--chapters" , default = False ,
305319 help = "Specify chapters to download separated by : (10:20)." )
320+ parser .add_argument (
321+ "-ct" , "--chapterthreads" , default = 5 ,
322+ help = "Number of parallel chapters downloads." )
323+ parser .add_argument (
324+ "-pt" , "--pagethreads" , default = 10 ,
325+ help = "Number of parallel chapter pages downloads (per chapter)." )
326+ parser .add_argument (
327+ "-wt" , "--waittime" , default = 10 ,
328+ help = "Wait time before retry if encountered with an error" )
329+ parser .add_argument (
330+ "-rt" , "--retries" , default = 10 ,
331+ help = "Number of retries before giving up" )
306332
307333 args = parser .parse_args ()
308334
309335 for url in args .urls :
310- comic = Comic (url , args . location )
336+ comic = Comic (url , args )
311337 print ('Downloading comic: ' + comic .name )
312338
313339 # Get chapters to download
0 commit comments