initial commit

danielburger1337 · danielburger1337 · commit 93e1712441ad · 2021-03-18T10:38:39.000+01:00
diff --git a/.editorconfig b/.editorconfig
@@ -0,0 +1,12 @@
+# EditorConfig is awesome: https://EditorConfig.org
+
+# top-most EditorConfig file
+root = true
+
+[*]
+indent_style = space
+indent_size = 4
+end_of_line = lf
+charset = utf-8
+trim_trailing_whitespace = true
+insert_final_newline = true
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,4 @@
+/build
+/dist
+/__pycache__
+/pdfmerge.spec
diff --git a/README.md b/README.md
@@ -0,0 +1,42 @@
+# PdfMerge
+
+Simple CLI program that merges consecutive ".pdf"-files in a directory.
+
+This software is intended for a very specific use case:
+
+Our current scanner can not easily output a front and back page of a scanned document into a single pdf. It can only do that if you scan your documents one at a time which obviously is very inconvenient for the employee if he has to scan hundreds of documents at a time.
+
+The workaround is that the scanner saves every front and back page into seperate files. Each filename has the current timestamp (down to the millisecond) and an auto incremented counter. This program simply merges these consecutive files in a given directory.
+
+## Example:
+
+If we have a directory with the following files:
+
+```
+1.pdf
+3.pdf
+5.pdf
+4.pdf
+```
+
+the program will do the following:
+
+```
+{uuid.uuid4()}.pdf # file containing 1.pdf and 3.pdf
+{uuid.uuid4()}.pdf # file containing 4.pdf and 5.pdf
+```
+
+---
+
+The algorithm for that is very sophisticated:
+
+It sorts the files in the selected directory by their name. Thats it.
+
+
+# Compile
+
+This software is written in python 3.9 and uses [pyinstaller](https://www.pyinstaller.org/) to create native binaries.
+
+```bash
+pyinstaller pdfmerge.py --onefile --icon=app.ico
+```
diff --git a/app.ico b/app.ico
diff --git a/pdfmerge.py b/pdfmerge.py
@@ -0,0 +1,73 @@
+from PyPDF2 import PdfFileMerger
+from tkinter import filedialog
+import tkinter as tk
+import uuid
+import os
+
+root = tk.Tk()
+root.withdraw()
+
+
+def pdf_concat():
+    pdfs = []
+
+    file_path = filedialog.askdirectory()
+    destination = file_path + os.path.sep + 'merged'
+
+    # load all "".pdf" files from the selected directory
+    # and put them into a list
+    for file in os.listdir(file_path):
+        if file.endswith(".pdf"):
+            pdfs.append(os.path.join(file_path, file))
+
+    pdfs.sort()
+    pdfLen = len(pdfs)
+
+    print(f'Found {pdfLen} ".pdf files" in {file_path}')
+
+    # return if directory was empty
+    if (pdfLen < 1):
+        return
+
+    # create the destination folder
+    if (os.path.isdir(destination) == False):
+        os.mkdir(destination)
+    else:
+        # else clear the folder
+        filelist = [f for f in os.listdir(destination) if f.endswith(".pdf")]
+        for f in filelist:
+            os.remove(os.path.join(destination, f))
+
+    # how many pdfs will be generated
+    length = int(len(pdfs) / 2) + 1
+    x = 1
+
+    # chunk the list into chunks of 2 and merge them
+    for i in chunks(pdfs, 2):
+        newFilename = f'{destination}{os.path.sep}{uuid.uuid4()}.pdf'
+
+        merger = PdfFileMerger()
+
+        for pdf in i:
+            merger.append(pdf)
+
+        merger.write(newFilename)
+        merger.close()
+
+        # give the user progress feedback
+        progress = int((x / length) * 100)
+        print(f'{newFilename} ({x} von {length}) {progress}%')
+
+        x += 1
+
+    # prevent automatic closing of the process
+    input()
+
+### php: array_chunk
+def chunks(lst, n):
+    for i in range(0, len(lst), n):
+        yield lst[i:i + n]
+
+
+if __name__ == '__main__':
+    pdf_concat()
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,6 @@
+altgraph==0.17
+macholib==1.14
+pyinstaller==4.2
+pyinstaller-hooks-contrib==2021.1
+PyPDF2==1.26.0
+tk==0.1.0

-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +/build
 +/dist
 +/__pycache__
 +/pdfmerge.spec