|
| 1 | +"""Base Comic class.""" |
| 2 | +import os |
| 3 | +from collections import OrderedDict |
| 4 | +import concurrent.futures |
| 5 | +import shutil |
| 6 | +import requests |
| 7 | +from zipfile import ZipFile, ZIP_DEFLATED |
| 8 | +import img2pdf |
| 9 | + |
| 10 | + |
| 11 | +class BaseComic: |
| 12 | + """Base Comic class. Contains chapters.""" |
| 13 | + |
| 14 | + def __init__(self, comic_url, program_args, verify_https): |
| 15 | + """Init function. Creates chapters for the given comic.""" |
| 16 | + self.url = comic_url |
| 17 | + self.name = comic_url.split('/')[-1] \ |
| 18 | + if comic_url.split('/')[-1] else comic_url.split('/')[-2] |
| 19 | + # Set download location |
| 20 | + self.download_location = os.path.abspath( |
| 21 | + os.path.join(program_args.location, self.name)) |
| 22 | + if not os.path.exists(self.download_location): |
| 23 | + os.makedirs(self.download_location) |
| 24 | + # Set threads and retry values |
| 25 | + self.chapter_threads = program_args.chapterthreads |
| 26 | + self.page_threads = program_args.pagethreads |
| 27 | + self.wait_time = program_args.waittime |
| 28 | + self.max_retries = program_args.retries |
| 29 | + self.file_format = program_args.format |
| 30 | + # Set verify mode |
| 31 | + self.verify_https = verify_https |
| 32 | + # Get all chapters and mode of download |
| 33 | + self.all_chapters = self.extract_chapters() |
| 34 | + |
| 35 | + def set_download_chapters(self, potential_keys=None): |
| 36 | + """Set chapters to download.""" |
| 37 | + if potential_keys: |
| 38 | + keys = list(set(potential_keys) & set(self.all_chapters.keys())) |
| 39 | + else: |
| 40 | + keys = list(self.all_chapters.keys()) |
| 41 | + |
| 42 | + # Sort keys to make it ascending order and make it a new dict |
| 43 | + unsorted_chapters = {key: self.all_chapters[key] |
| 44 | + for key in keys} |
| 45 | + self.chapters_to_download = OrderedDict( |
| 46 | + sorted(unsorted_chapters.items(), key=lambda t: t[0])) |
| 47 | + # Print downloading chapters |
| 48 | + print("Downloading the below chapters:") |
| 49 | + print(sorted(keys)) |
| 50 | + |
| 51 | + def download_comic(self): |
| 52 | + """Begin download the chapters in the comic.""" |
| 53 | + with concurrent.futures.ThreadPoolExecutor( |
| 54 | + max_workers=self.chapter_threads) as executor: |
| 55 | + future_to_chapter = { |
| 56 | + executor.submit(chapter.download_chapter): chapter_num |
| 57 | + for chapter_num, chapter in self.chapters_to_download.items()} |
| 58 | + |
| 59 | + for future in concurrent.futures.as_completed(future_to_chapter): |
| 60 | + chapter_num = future_to_chapter[future] |
| 61 | + try: |
| 62 | + future.result() |
| 63 | + except Exception as exc: |
| 64 | + print('Chapter-%g generated an exception: %s' |
| 65 | + % (chapter_num, exc)) |
| 66 | + else: |
| 67 | + print('Downloaded: Chapter-%g' % (chapter_num)) |
| 68 | + |
| 69 | + def extract_chapters(self): |
| 70 | + """Extract chapters function (backbone).""" |
| 71 | + pass |
| 72 | + |
| 73 | + |
| 74 | +class BaseChapter: |
| 75 | + """Base Chapter class. Contains pages.""" |
| 76 | + |
| 77 | + def __init__(self, comic, chapter_num, volume_num, chapter_url): |
| 78 | + """Initialize constants required for download.""" |
| 79 | + # Extract necessary information from the comic object |
| 80 | + self.comic_name = comic.name |
| 81 | + self.comic_download_location = comic.download_location |
| 82 | + # Create chapter specific variables |
| 83 | + self.chapter_num = chapter_num |
| 84 | + self.volume_num = volume_num |
| 85 | + self.chapter_url = chapter_url |
| 86 | + # Threads and retry time |
| 87 | + self.page_threads = comic.page_threads |
| 88 | + self.wait_time = comic.wait_time |
| 89 | + self.max_retries = comic.max_retries |
| 90 | + self.comic_file_format = comic.file_format |
| 91 | + # Set verify mode |
| 92 | + self.verify_https = comic.verify_https |
| 93 | + # Get download chapter location |
| 94 | + self.chapter_location = os.path.join( |
| 95 | + self.comic_download_location, 'chapter-' + str(self.chapter_num)) |
| 96 | + |
| 97 | + def download_chapter(self): |
| 98 | + """Download and convert it into a cbz file.""" |
| 99 | + init_status, pages = self.get_pages() |
| 100 | + download_func = self.download_page |
| 101 | + |
| 102 | + if not init_status: |
| 103 | + raise RuntimeError('Unable to obtain pages in the chapter') |
| 104 | + |
| 105 | + # Create chapter location (if it doesn't exist) |
| 106 | + if not os.path.exists(self.chapter_location): |
| 107 | + os.makedirs(self.chapter_location) |
| 108 | + |
| 109 | + # Download individual pages in parallel |
| 110 | + with concurrent.futures.ThreadPoolExecutor( |
| 111 | + max_workers=self.page_threads) as executor: |
| 112 | + executor.map(download_func, pages) |
| 113 | + |
| 114 | + # Convert the folder to a comic book zip filename |
| 115 | + chapter_name = os.path.join( |
| 116 | + self.comic_download_location, '%s-%g (v%d)' |
| 117 | + % (self.comic_name, self.chapter_num, self.volume_num)) |
| 118 | + |
| 119 | + if self.comic_file_format == 'pdf': |
| 120 | + pdfdir(self.chapter_location, chapter_name + ".pdf") |
| 121 | + elif self.comic_file_format == 'cbz': |
| 122 | + zipdir(self.chapter_location, chapter_name + ".cbz") |
| 123 | + shutil.rmtree(self.chapter_location) |
| 124 | + |
| 125 | + def get_pages(self): |
| 126 | + """Get pages function (backbone).""" |
| 127 | + return False, 0 |
| 128 | + |
| 129 | + def download_page(self): |
| 130 | + """Download page (backbone).""" |
| 131 | + pass |
| 132 | + |
| 133 | + def download_image(self, url, filename): |
| 134 | + """Download image (url) and save (filename).""" |
| 135 | + response = requests.get(url, stream=True, verify=self.verify_https) |
| 136 | + with open(filename, 'wb') as out_file: |
| 137 | + shutil.copyfileobj(response.raw, out_file) |
| 138 | + del response |
| 139 | + |
| 140 | + |
| 141 | +def zipdir(folder, filename): |
| 142 | + """Zip folder.""" |
| 143 | + assert os.path.isdir(folder) |
| 144 | + zipf = ZipFile(filename, 'w', ZIP_DEFLATED) |
| 145 | + for root, dirs, files in os.walk(folder): |
| 146 | + # note: ignore empty directories |
| 147 | + for fn in sorted(files): |
| 148 | + zipf.write( |
| 149 | + os.path.join(root, fn), |
| 150 | + os.path.relpath(os.path.join(root, fn), folder)) |
| 151 | + zipf.close() |
| 152 | + |
| 153 | + |
| 154 | +def pdfdir(folder, filename): |
| 155 | + """Create PDF of images in the folder.""" |
| 156 | + assert os.path.isdir(folder) |
| 157 | + with open(filename, "wb") as f: |
| 158 | + for root, dirs, files in os.walk(folder): |
| 159 | + # Convert images to pdf |
| 160 | + f.write(img2pdf.convert( |
| 161 | + [os.path.join(root, fn) for fn in sorted(files)])) |
0 commit comments