77import os
88import re
99import concurrent .futures
10+ from urllib .parse import urlparse , urljoin
1011from zipfile import ZipFile , ZIP_DEFLATED
1112from random import shuffle , uniform
1213from time import sleep
1314from copy import deepcopy
14- from fpdf import FPDF
15- from PIL import Image
16- from PyPDF2 import PdfFileMerger
15+ # from fpdf import FPDF
16+ # from PIL import Image
17+ # from PyPDF2 import PdfFileMerger
1718
1819
1920class Comic :
21+ """Comic class. Contains chapters."""
22+
2023 def __init__ (self , comic_url , program_args ):
24+ """Init function. Creates chapters for the given comic."""
2125 self .url = comic_url
2226 self .name = comic_url .split ('/' )[- 1 ] \
2327 if comic_url .split ('/' )[- 1 ] else comic_url .split ('/' )[- 2 ]
@@ -36,6 +40,7 @@ def __init__(self, comic_url, program_args):
3640 self .all_chapters = self .get_chapters ()
3741
3842 def get_chapters (self ):
43+ """Get list of chapters."""
3944 if 'mangafox' in self .url :
4045 self .mode = ['manga' , 'mangafox' ]
4146 chapters = self .manga_extract_chapters ()
@@ -52,6 +57,7 @@ def get_chapters(self):
5257 return chapters
5358
5459 def set_download_chapters (self , potential_keys = None ):
60+ """Set chapters to download."""
5561 if potential_keys :
5662 keys = list (set (potential_keys ) & set (self .all_chapters .keys ()))
5763 else :
@@ -67,6 +73,7 @@ def set_download_chapters(self, potential_keys=None):
6773 print (sorted (keys ))
6874
6975 def download_comic (self ):
76+ """Begin download the chapters in the comic."""
7077 with concurrent .futures .ThreadPoolExecutor (
7178 max_workers = self .chapter_threads ) as executor :
7279 future_to_chapter = {
@@ -84,8 +91,12 @@ def download_comic(self):
8491 print ('Downloaded: Chapter-%g' % (chapter_num ))
8592
8693 def manga_extract_chapters (self ):
94+ """Extract chapters if the comic is a manga."""
8795 comic_name = self .name
8896 url = self .url
97+ urlscheme = urlparse (url )
98+
99+ # Get chapters
89100 r = requests .get (url )
90101 soup = bsoup .BeautifulSoup (r .text , 'html.parser' )
91102
@@ -97,7 +108,8 @@ def manga_extract_chapters(self):
97108 ('manga' in link .get ('href' ))]
98109
99110 for link in links :
100- chapter_link = '/' .join (link .split ('/' )[:- 1 ])
111+ chapter_link = urljoin (urlscheme .scheme + "://" + urlscheme .netloc ,
112+ '/' .join (link .split ('/' )[:- 1 ]))
101113 matched_groups = re .search ('v(\d*)/c([\d \.]*)' , chapter_link )
102114 if matched_groups :
103115 volume_num = int (matched_groups .group (1 ))
@@ -110,6 +122,7 @@ def manga_extract_chapters(self):
110122 return chapters
111123
112124 def comic_extract_chapters (self ):
125+ """Extract chapters if it is a comic."""
113126 url = self .url
114127 comic = url .split ('/' )[- 1 ]
115128 r = requests .get (url )
@@ -133,8 +146,11 @@ def comic_extract_chapters(self):
133146
134147
135148class Chapter :
149+ """Chapter class. Contains pages."""
150+
136151 def __init__ (self , comic , chapter_num , volume_num , chapter_url ):
137- # Extract necessay information from the comic object
152+ """Initialize constants required for download."""
153+ # Extract necessary information from the comic object
138154 self .comic_name = comic .name
139155 self .comic_download_location = comic .download_location
140156 self .comic_mode = comic .mode
@@ -149,7 +165,7 @@ def __init__(self, comic, chapter_num, volume_num, chapter_url):
149165 self .comic_file_format = comic .file_format
150166
151167 def download_chapter (self ):
152- ''' Download and convert it into a cbz file '''
168+ """ Download and convert it into a cbz file."""
153169 init_status , pages , download_func = self .initialize_chapter_download ()
154170
155171 if not init_status :
@@ -174,15 +190,15 @@ def download_chapter(self):
174190 chapter_name = os .path .join (
175191 self .comic_download_location , '%s-%g.cbz'
176192 % (self .comic_name , self .chapter_num ))
177-
193+
178194 if self .comic_file_format == 'pdf' :
179195 pdfdir (self .chapter_location , chapter_name )
180196 else :
181197 zipdir (self .chapter_location , chapter_name )
182198 shutil .rmtree (self .chapter_location )
183199
184200 def initialize_chapter_download (self ):
185- ''' Obtain pages and function based on the mode '''
201+ """ Obtain pages and function based on the mode."""
186202 if self .comic_mode [0 ] == 'manga' :
187203 init_status , pages = self .manga_get_pages ()
188204 func = self .manga_download_page
@@ -193,6 +209,7 @@ def initialize_chapter_download(self):
193209 return init_status , pages , func
194210
195211 def manga_get_pages (self ):
212+ """Obtain list of pages in a manga chapter."""
196213 # Get base url
197214 if (self .comic_mode [1 ] == 'mangafox' ):
198215 base_url = self .chapter_url + '/1.html'
@@ -220,35 +237,36 @@ def manga_get_pages(self):
220237 total_pages = int (matched_groups .group (1 ))
221238 break
222239 # Get page urls
223- page_urls = ["%s/%d.html" % (self .chapter_url , i + 1 )
240+ page_urls = ["%s/%d.html" % (self .chapter_url , i + 1 )
224241 for i in range (total_pages )]
225- page_num = [i + 1 for i in range (total_pages )]
242+ page_num = [i + 1 for i in range (total_pages )]
226243 pages = list (zip (page_urls , page_num ))
227244 shuffle (pages )
228245
229246 return True , pages
230247
231248 elif (max_retries > 0 ):
232249 # Idea from manga_downloader (which in turn was from wget)
233- sleep (uniform (0.5 * wait_retry_time , 1.5 * wait_retry_time ))
250+ sleep (uniform (0.5 * wait_retry_time , 1.5 * wait_retry_time ))
234251 max_retries -= 1
235252 else :
236253 return False , None
237254
238255 def comic_get_pages (self ):
256+ """Obtain list of pages in a comic chapter."""
239257 url = self .chapter_url
240258 r = requests .get (url )
241259 soup = bsoup .BeautifulSoup (r .text , 'html.parser' )
242260 images = [image .get ('src' ) for image in soup .find_all (
243261 'img' , attrs = {'class' : "chapter_img" })]
244- page_num = [i + 1 for i in range (len (images ))]
262+ page_num = [i + 1 for i in range (len (images ))]
245263 pages = list (zip (images , page_num ))
246264 shuffle (pages )
247265
248266 return True , pages
249267
250268 def manga_download_page (self , page ):
251- ''' Downloads individual pages in a manga '''
269+ """Download individual pages in a manga."""
252270 page_url , page_num = page
253271 filename = os .path .join (self .chapter_location ,
254272 '%0.3d.jpg' % (page_num ))
@@ -266,7 +284,7 @@ def manga_download_page(self, page):
266284 return True
267285 elif (max_retries > 0 ):
268286 # Idea from manga_downloader (which in turn was from wget)
269- sleep (uniform (0.5 * wait_retry_time , 1.5 * wait_retry_time ))
287+ sleep (uniform (0.5 * wait_retry_time , 1.5 * wait_retry_time ))
270288 max_retries -= 1
271289 else :
272290 print ("Failed download: Chapter-%g, page-%d"
@@ -278,7 +296,7 @@ def manga_download_page(self, page):
278296 return False
279297
280298 def comic_download_page (self , page ):
281- ''' Downloads individual pages in a manga '''
299+ """Download individual pages in a comic."""
282300 image , page_num = page
283301 filename = os .path .join (self .chapter_location ,
284302 '%0.3d.jpg' % (page_num ))
@@ -288,13 +306,15 @@ def comic_download_page(self, page):
288306
289307
290308def download_image (url , filename ):
309+ """Download image (url) and save (filename)."""
291310 response = requests .get (url , stream = True )
292311 with open (filename , 'wb' ) as out_file :
293312 shutil .copyfileobj (response .raw , out_file )
294313 del response
295314
296315
297316def zipdir (folder , filename ):
317+ """Zip folder."""
298318 assert os .path .isdir (folder )
299319 zipf = ZipFile (filename , 'w' , ZIP_DEFLATED )
300320 for root , dirs , files in os .walk (folder ):
@@ -305,42 +325,44 @@ def zipdir(folder, filename):
305325 os .path .relpath (os .path .join (root , fn ), folder ))
306326 zipf .close ()
307327
328+
308329def pdfdir (folder , filename ):
330+ """Create PDF of images in the folder."""
309331 assert os .path .isdir (folder )
310332 for root , dirs , files in os .walk (folder ):
311333 pass
312-
334+
313335 for fn in files :
314- im = Image .open (folder + os .sep + fn )
336+ im = Image .open (folder + os .sep + fn )
315337 width , height = im .size
316- pdf = FPDF (unit = "pt" , format = [width , height ])
338+ pdf = FPDF (unit = "pt" , format = [width , height ])
317339 pdf .add_page ()
318340 pdf .image (folder + os .sep + fn , 0 , 0 )
319341 pdf .output (folder + os .sep + fn .rsplit ('.' , 1 )[0 ] + '.pdf' , 'F' )
320342
321343 merger = PdfFileMerger ()
322- for fn in files :
323- merger .append (open (folder + os .sep + fn .rsplit ('.' , 1 )[0 ] + '.pdf' , 'rb' ))
324-
325- merge_file = open (filename .rsplit ('.' , 1 )[0 ] + '.pdf' ,'wb' )
344+ for fn in files :
345+ merger .append (
346+ open (folder + os .sep + fn .rsplit ('.' , 1 )[0 ] + '.pdf' , 'rb' ))
347+
348+ merge_file = open (filename .rsplit ('.' , 1 )[0 ] + '.pdf' , 'wb' )
326349 merger .write (merge_file )
327-
328-
350+
329351
330352# cover = Image.open(folder + os.sep + fn)
331353# width, height = cover.size
332354# pdf = FPDF(unit = "pt", format = [width, height])
333355# pdf.add_page()
334356# pdf.image(folder + os.sep + fn, 0, 0)
335357# pdf.output(folder + os.sep + fn.rsplit('.', 1)[0] + '.pdf', 'F')
336- #
358+ #
337359# merger = PdfFileMerger()
338360# for fn in files:
339361# merger.append(open(folder + os.sep + fn.rsplit('.', 1)[0] + '.pdf', 'rb'))
340362# merger.write(filename.rsplit('.', 1)[0] + '.pdf')
341363
342364def main ():
343- # parse input
365+ """Parse input and download comic(s)."""
344366 parser = argparse .ArgumentParser (
345367 description = (
346368 'Downloads all manga chapters from'
@@ -367,11 +389,11 @@ def main():
367389 "-wt" , "--waittime" , default = 10 ,
368390 help = "Wait time before retry if encountered with an error" )
369391 parser .add_argument (
370- "-rt" , "--retries" , default = 10 ,
392+ "-rt" , "--retries" , default = 30 ,
371393 help = "Number of retries before giving up" )
372394 parser .add_argument (
373395 "-f" , "--format" , default = 'cbz' ,
374- help = "File format of the downloaded file, supported .PDF and .CBZ " )
396+ help = "File format of the downloaded file, supported 'pdf' and 'cbz' " )
375397
376398 args = parser .parse_args ()
377399
@@ -386,7 +408,9 @@ def main():
386408 if len (start_stop ) == 1 :
387409 potential_keys = [float (start_stop [0 ])]
388410 elif len (start_stop ) == 2 :
389- potential_keys = [i * 0.5 for i in range (2 * int (start_stop [0 ]), 2 * int (start_stop [1 ])+ 1 )]
411+ potential_keys = [
412+ i * 0.5 for i in range (2 * int (start_stop [0 ]),
413+ 2 * int (start_stop [1 ]) + 1 )]
390414 else :
391415 raise SyntaxError (
392416 "Chapter inputs should be separated by ':'" )
@@ -401,7 +425,6 @@ def main():
401425 comic .download_comic ()
402426 print ('Downloaded comic:' + url .split ('/' )[- 1 ])
403427
404-
405-
428+
406429if __name__ == '__main__' :
407430 main ()
0 commit comments