From cf4411eb3aa5505a1391e60cca5e99463b06c0e7 Mon Sep 17 00:00:00 2001 From: hello Date: Tue, 11 Apr 2023 18:04:53 +0800 Subject: [PATCH 1/2] merge pdf --- merge.py | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 merge.py diff --git a/merge.py b/merge.py new file mode 100644 index 0000000..342e85a --- /dev/null +++ b/merge.py @@ -0,0 +1,45 @@ +#!/bin/env python3 + +import os +import datetime + +# pip3 install pypdf +from pypdf import PdfWriter, PdfReader, PageRange + + +mfname= "book3_Elements_of_Mathematics" +def main(): + + pdfwrtr = PdfWriter() + t_pages = 0 + net_start = False + for r, dirs, files in os.walk("."): + del dirs[:] + + for fitem in files: + fname, fext = os.path.splitext(fitem) + + if fext.lower() != ".pdf": + continue + if not fitem.startswith("Book"): + continue + + pdfrd = PdfReader(fitem, 'rb') + + meta = pdfrd.metadata + + fname = fname.replace("Book3_", "") + fname = fname.split("__")[0] + fname = fname.replace("_", " ") + + pdfwrtr.append( pdfrd, fname ) + pdfwrtr.add_metadata(meta) + + now = datetime.datetime.now() + now_str = now.strftime("%Y.%m.%d_%H_%M_%S") + pdfwrtr.write("{mfname}.{nw}.pdf".format(mfname=mfname, nw=now_str)) + pdfwrtr.close() + + +if __name__ == '__main__': + main() From 919d1973ab7f1a806b8486f50a0f9e13ff82ae2d Mon Sep 17 00:00:00 2001 From: nono Date: Mon, 20 May 2024 16:40:44 +0800 Subject: [PATCH 2/2] merge book --- merge.py | 40 +++++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/merge.py b/merge.py index 342e85a..3af2adf 100644 --- a/merge.py +++ b/merge.py @@ -3,16 +3,20 @@ import os import datetime -# pip3 install pypdf +# pip3 install pypdf natsort from pypdf import PdfWriter, PdfReader, PageRange +from natsort import natsorted -mfname= "book3_Elements_of_Mathematics" +mfname= "book3_elements_of_mathematics" def main(): pdfwrtr = PdfWriter() t_pages = 0 net_start = False + + pdfs = [] + for r, dirs, files in os.walk("."): del dirs[:] @@ -23,17 +27,27 @@ def main(): continue if not fitem.startswith("Book"): continue - - pdfrd = PdfReader(fitem, 'rb') - - meta = pdfrd.metadata - - fname = fname.replace("Book3_", "") - fname = fname.split("__")[0] - fname = fname.replace("_", " ") - - pdfwrtr.append( pdfrd, fname ) - pdfwrtr.add_metadata(meta) + + pdfs.append(fitem) + + + pwd = os.getcwd() + + pdfs = natsorted(pdfs) + + # print(" ----- ===== ", pdfs) + for fitem in pdfs: + # print(" ---- === --- ", fitem ) + fname, fext = os.path.splitext(fitem) + + fname = fname.replace("Book3_", "") + fname = fname.rsplit("__", 2)[0] + fname = fname.replace("_", " ") + + pdfrd = PdfReader(os.path.join(pwd, fitem), 'rb') + meta = pdfrd.metadata + pdfwrtr.append( pdfrd, fname ) + pdfwrtr.add_metadata(meta) now = datetime.datetime.now() now_str = now.strftime("%Y.%m.%d_%H_%M_%S")