Skip to content
This repository was archived by the owner on Apr 2, 2024. It is now read-only.

Commit 7450630

Browse files
authored
Merge pull request #3 from dave-shivansh/master
pdf-export (incomplete)
2 parents 1c7cf14 + 8b6137a commit 7450630

File tree

1 file changed

+49
-6
lines changed

1 file changed

+49
-6
lines changed

comic_scraper/comic_scraper.py

Lines changed: 49 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,11 @@
99
import concurrent.futures
1010
from zipfile import ZipFile, ZIP_DEFLATED
1111
from random import shuffle, uniform
12-
from numpy import arange
1312
from time import sleep
1413
from copy import deepcopy
14+
from fpdf import FPDF
15+
from PIL import Image
16+
from PyPDF2 import PdfFileMerger
1517

1618

1719
class Comic:
@@ -29,6 +31,7 @@ def __init__(self, comic_url, program_args):
2931
self.page_threads = program_args.pagethreads
3032
self.wait_time = program_args.waittime
3133
self.max_retries = program_args.retries
34+
self.file_format = program_args.format
3235
# Get all chapters and mode of download
3336
self.all_chapters = self.get_chapters()
3437

@@ -143,6 +146,7 @@ def __init__(self, comic, chapter_num, volume_num, chapter_url):
143146
self.page_threads = comic.page_threads
144147
self.wait_time = comic.wait_time
145148
self.max_retries = comic.max_retries
149+
self.comic_file_format = comic.file_format
146150

147151
def download_chapter(self):
148152
''' Download and convert it into a cbz file '''
@@ -170,8 +174,11 @@ def download_chapter(self):
170174
chapter_name = os.path.join(
171175
self.comic_download_location, '%s-%g.cbz'
172176
% (self.comic_name, self.chapter_num))
173-
174-
zipdir(self.chapter_location, chapter_name)
177+
178+
if self.comic_file_format == 'pdf':
179+
pdfdir(self.chapter_location, chapter_name)
180+
else:
181+
zipdir(self.chapter_location, chapter_name)
175182
shutil.rmtree(self.chapter_location)
176183

177184
def initialize_chapter_download(self):
@@ -298,6 +305,39 @@ def zipdir(folder, filename):
298305
os.path.relpath(os.path.join(root, fn), folder))
299306
zipf.close()
300307

308+
def pdfdir(folder, filename):
309+
assert os.path.isdir(folder)
310+
for root, dirs, files in os.walk(folder):
311+
pass
312+
313+
for fn in files:
314+
im=Image.open(folder + os.sep + fn)
315+
width, height = im.size
316+
pdf = FPDF(unit = "pt", format = [width, height])
317+
pdf.add_page()
318+
pdf.image(folder + os.sep + fn, 0, 0)
319+
pdf.output(folder + os.sep + fn.rsplit('.', 1)[0] + '.pdf', 'F')
320+
321+
merger = PdfFileMerger()
322+
for fn in files:
323+
merger.append(open(folder + os.sep + fn.rsplit('.', 1)[0] + '.pdf', 'rb'))
324+
325+
merge_file = open(filename.rsplit('.', 1)[0] + '.pdf','wb')
326+
merger.write(merge_file)
327+
328+
329+
330+
# cover = Image.open(folder + os.sep + fn)
331+
# width, height = cover.size
332+
# pdf = FPDF(unit = "pt", format = [width, height])
333+
# pdf.add_page()
334+
# pdf.image(folder + os.sep + fn, 0, 0)
335+
# pdf.output(folder + os.sep + fn.rsplit('.', 1)[0] + '.pdf', 'F')
336+
#
337+
# merger = PdfFileMerger()
338+
# for fn in files:
339+
# merger.append(open(folder + os.sep + fn.rsplit('.', 1)[0] + '.pdf', 'rb'))
340+
# merger.write(filename.rsplit('.', 1)[0] + '.pdf')
301341

302342
def main():
303343
# parse input
@@ -329,6 +369,9 @@ def main():
329369
parser.add_argument(
330370
"-rt", "--retries", default=10,
331371
help="Number of retries before giving up")
372+
parser.add_argument(
373+
"-f", "--format", default='cbz',
374+
help="File format of the downloaded file, supported .PDF and .CBZ")
332375

333376
args = parser.parse_args()
334377

@@ -343,8 +386,7 @@ def main():
343386
if len(start_stop) == 1:
344387
potential_keys = [float(start_stop[0])]
345388
elif len(start_stop) == 2:
346-
potential_keys = list(arange(
347-
float(start_stop[0]), float(start_stop[1])+0.5, 0.5))
389+
potential_keys = [i*0.5 for i in range(2*int(start_stop[0]), 2*int(start_stop[1])+1)]
348390
else:
349391
raise SyntaxError(
350392
"Chapter inputs should be separated by ':'")
@@ -359,6 +401,7 @@ def main():
359401
comic.download_comic()
360402
print('Downloaded comic:' + url.split('/')[-1])
361403

362-
404+
405+
363406
if __name__ == '__main__':
364407
main()

0 commit comments

Comments
 (0)