Skip to content

Commit b7b6bf5

Browse files
Merge pull request #313 from jaygangurde/patch-1
Create "Python PDF Sorter"
2 parents af82f36 + 6f7843a commit b7b6bf5

File tree

1 file changed

+123
-0
lines changed

1 file changed

+123
-0
lines changed

"Python PDF Sorter"

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
import os
2+
import re
3+
import PyPDF2
4+
import tkinter as tk
5+
from tkinter import filedialog, scrolledtext
6+
7+
class PDFSorterGUI:
8+
def __init__(self, root):
9+
self.root = root
10+
self.root.title("PDF Sorter")
11+
self.root.geometry("600x400")
12+
13+
# Initialize variables
14+
self.keyword_to_directory_map = {}
15+
self.min_size_bytes = 0
16+
self.start_page = 1
17+
self.end_page = 1
18+
19+
# Create UI elements
20+
self.create_ui()
21+
22+
def create_ui(self):
23+
# Labels and Textboxes
24+
folder_label = tk.Label(self.root, text="Select Folder:")
25+
folder_label.pack()
26+
self.folder_entry = tk.Entry(self.root)
27+
self.folder_entry.pack()
28+
29+
keywords_label = tk.Label(self.root, text="Keywords (comma-separated):")
30+
keywords_label.pack()
31+
self.keywords_entry = tk.Entry(self.root)
32+
self.keywords_entry.pack()
33+
34+
size_label = tk.Label(self.root, text="Minimum Size (bytes):")
35+
size_label.pack()
36+
self.size_entry = tk.Entry(self.root)
37+
self.size_entry.pack()
38+
39+
page_range_label = tk.Label(self.root, text="Page Range (e.g., 1-5):")
40+
page_range_label.pack()
41+
self.page_range_entry = tk.Entry(self.root)
42+
self.page_range_entry.pack()
43+
44+
# Sort button
45+
sort_button = tk.Button(self.root, text="Sort PDFs", command=self.sort_pdfs)
46+
sort_button.pack()
47+
48+
# Log area
49+
self.log_text = scrolledtext.ScrolledText(self.root, wrap=tk.WORD, width=50, height=10)
50+
self.log_text.pack()
51+
52+
def sort_pdfs(self):
53+
folder_path = self.folder_entry.get()
54+
keywords = self.keywords_entry.get()
55+
size_str = self.size_entry.get()
56+
page_range_str = self.page_range_entry.get()
57+
58+
# Parse keywords
59+
keyword_list = [kw.strip() for kw in keywords.split(',')]
60+
self.keyword_to_directory_map.clear()
61+
for keyword in keyword_list:
62+
self.keyword_to_directory_map[keyword] = "/path/to/sorted/directory" # Replace with actual directory paths
63+
64+
# Parse minimum size
65+
try:
66+
self.min_size_bytes = int(size_str)
67+
except ValueError:
68+
self.log_text.insert(tk.END, "Invalid minimum size value.\n")
69+
return
70+
71+
# Parse page range
72+
page_range = page_range_str.split('-')
73+
if len(page_range) != 2:
74+
self.log_text.insert(tk.END, "Invalid page range format.\n")
75+
return
76+
try:
77+
self.start_page = int(page_range[0])
78+
self.end_page = int(page_range[1])
79+
except ValueError:
80+
self.log_text.insert(tk.END, "Invalid page range values.\n")
81+
return
82+
83+
# Sort the PDFs
84+
self.sort_pdfs_by_criteria(folder_path)
85+
86+
def sort_pdfs_by_criteria(self, folder_path):
87+
for root, _, files in os.walk(folder_path):
88+
for file in files:
89+
if file.endswith(".pdf"):
90+
pdf_file_path = os.path.join(root, file)
91+
92+
try:
93+
with open(pdf_file_path, "rb") as pdf_file:
94+
pdf_reader = PyPDF2.PdfFileReader(pdf_file)
95+
file_size = os.path.getsize(pdf_file_path)
96+
num_pages = pdf_reader.getNumPages()
97+
98+
if file_size < self.min_size_bytes:
99+
continue
100+
101+
if not (self.start_page <= num_pages <= self.end_page):
102+
continue
103+
104+
text = ""
105+
for page_num in range(num_pages):
106+
page = pdf_reader.getPage(page_num)
107+
text += page.extractText()
108+
109+
for keyword, destination_directory in self.keyword_to_directory_map.items():
110+
if re.search(keyword, text, re.IGNORECASE):
111+
destination_file = os.path.join(destination_directory, file)
112+
os.rename(pdf_file_path, destination_file)
113+
self.log_text.insert(tk.END, f"Moved {file} to {destination_directory}\n")
114+
break
115+
116+
except Exception as e:
117+
self.log_text.insert(tk.END, f"Error processing PDF: {file}\n")
118+
print(e)
119+
120+
if __name__ == "__main__":
121+
root = tk.Tk()
122+
app = PDFSorterGUI(root)
123+
root.mainloop()

0 commit comments

Comments
 (0)