Skip to content

Commit 9c0d365

Browse files
committed
Add article source code files
1 parent 1e0b8bc commit 9c0d365

File tree

7 files changed

+352
-2
lines changed

7 files changed

+352
-2
lines changed

creating-and-modifying-pdfs/README.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,4 @@ There are two subfolders in this folder:
77
1. **`practice_files/`:** Contains the sample PDFs used in the chapter
88
2. **`source_code/`:** Contains source code from the chapter
99

10-
TODO:
11-
- [ ] Add source code files
10+
The source code files are organized by section of the article, and the start of each subsection is indicated with comments.
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
2+
# ---------------
3+
# Open a PDF File
4+
# ---------------
5+
6+
from PyPDF2 import PdfFileReader
7+
8+
# You might need to change this to match the path on your computer
9+
from pathlib import Path
10+
pdf_path = (
11+
Path.home() /
12+
"creating-and-modifying-pdfs" /
13+
"practice_files" /
14+
"Pride_and_Prejudice.pdf"
15+
)
16+
17+
pdf = PdfFileReader(str(pdf_path))
18+
19+
print(pdf.getNumPages())
20+
21+
print(pdf.documentInfo)
22+
23+
print(pdf.documentInfo.title)
24+
25+
26+
# ---------------------------
27+
# Extracting Text From a Page
28+
# ---------------------------
29+
30+
first_page = pdf.getPage(0)
31+
32+
print(type(first_page))
33+
34+
print(first_page.extractText())
35+
36+
for page in pdf.pages:
37+
print(page.extractText())
38+
39+
40+
# -----------------------
41+
# Putting It All Together
42+
# -----------------------
43+
44+
from pathlib import Path
45+
from PyPDF2 import PdfFileReader
46+
47+
# Change the path below to the correct path for your computer.
48+
pdf_path = (
49+
Path.home() /
50+
"creating-and-modifying-pdfs" /
51+
"practice-files" /
52+
"Pride_and_Prejudice.pdf"
53+
)
54+
55+
pdf_reader = PdfFileReader(str(pdf_path))
56+
output_file_path = Path.home() / "Pride_and_Prejudice.txt"
57+
58+
with output_file_path.open(mode="w") as output_file:
59+
title = pdf_reader.documentInfo.title
60+
num_pages = pdf_reader.getNumPages()
61+
output_file.write(f"{title}\\nNumber of pages: {num_pages}\\n\\n")
62+
63+
for page in pdf_reader.pages:
64+
text = page.extractText()
65+
output_file.write(text)
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# -----------------------------
2+
# Using the PdfFileWriter Class
3+
# -----------------------------
4+
5+
from PyPDF2 import PdfFileWriter
6+
pdf_writer = PdfFileWriter()
7+
8+
page = pdf_writer.addBlankPage(width=72, height=72)
9+
10+
print(type(page))
11+
12+
from pathlib import Path
13+
with Path("blank.pdf").open(mode="wb") as output_file:
14+
pdf_writer.write(output_file)
15+
16+
17+
# -----------------------------------
18+
# Extracting a Single Page From a PDF
19+
# -----------------------------------
20+
21+
from pathlib import Path
22+
from PyPDF2 import PdfFileReader, PdfFileWriter
23+
24+
# Change the path to work on your computer if necessary
25+
pdf_path = (
26+
Path.home() /
27+
"creating-and-modifying-pdfs" /
28+
"practice_files" /
29+
"Pride_and_Prejudice.pdf"
30+
)
31+
input_pdf = PdfFileReader(str(pdf_path))
32+
33+
first_page = input_pdf.getPage(0)
34+
35+
pdf_writer = PdfFileWriter()
36+
pdf_writer.addPage(first_page)
37+
38+
with Path("first_page.pdf").open(mode="wb") as output_file:
39+
pdf_writer.write(output_file)
40+
41+
42+
# ------------------------------------
43+
# Extracting Multiple Pages From a PDF
44+
# ------------------------------------
45+
46+
from PyPDF2 import PdfFileReader, PdfFileWriter
47+
from pathlib import Path
48+
49+
pdf_path = (
50+
Path.home() /
51+
"creating-and-modifying-pdfs" /
52+
"practice_files" /
53+
"Pride_and_Prejudice.pdf"
54+
)
55+
input_pdf = PdfFileReader(str(pdf_path))
56+
57+
pdf_writer = PdfFileWriter()
58+
for n in range(1, 4):
59+
page = input_pdf.getPage(n)
60+
pdf_writer.addPage(page)
61+
62+
print(pdf_writer.getNumPages())
63+
64+
pdf_writer = PdfFileWriter()
65+
66+
for page in input_pdf.pages[1:4]:
67+
pdf_writer.addPage(page)
68+
69+
with Path("chapter1_slice.pdf").open(mode="wb") as output_file:
70+
pdf_writer.write(output_file)
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# -----------------------------
2+
# Using the PdfFileMerger Class
3+
# -----------------------------
4+
5+
from PyPDF2 import PdfFileMerger
6+
pdf_merger = PdfFileMerger()
7+
8+
# ---------------------------------
9+
# Concatenating PDFs With .append()
10+
# ---------------------------------
11+
12+
from pathlib import Path
13+
reports_dir = (
14+
Path.home() /
15+
"creating-and-modifying-pdfs" /
16+
"practice_files" /
17+
"expense_reports"
18+
)
19+
20+
for path in reports_dir.glob("*.pdf"):
21+
print(path.name)
22+
23+
expense_reports = list(reports_dir.glob("*.pdf"))
24+
expense_reports.sort()
25+
26+
for path in expense_reports:
27+
print(path.name)
28+
29+
for path in expense_reports:
30+
pdf_merger.append(str(path))
31+
32+
with Path("expense_reports.pdf").open(mode="wb") as output_file:
33+
pdf_merger.write(output_file)
34+
35+
36+
# --------------------------
37+
# Merging PDFs With .merge()
38+
# --------------------------
39+
40+
from pathlib import Path
41+
from PyPDF2 import PdfFileMerger
42+
43+
report_dir = (
44+
Path.home() /
45+
"creating-and-modifying-pdfs" /
46+
"practice_files" /
47+
"quarterly_report"
48+
)
49+
50+
report_path = report_dir / "report.pdf"
51+
toc_path = report_dir / "toc.pdf"
52+
53+
pdf_merger = PdfFileMerger()
54+
pdf_merger.append(str(report_path))
55+
56+
pdf_merger.merge(1, str(toc_path))
57+
58+
with Path("full_report.pdf").open(mode="wb") as output_file:
59+
pdf_merger.write(output_file)
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
# --------------
2+
# Rotating Pages
3+
# --------------
4+
5+
from pathlib import Path
6+
from PyPDF2 import PdfFileReader, PdfFileWriter
7+
8+
pdf_path = (
9+
Path.home() /
10+
"creating-and-modifying-pdfs" /
11+
"practice_files" /
12+
"ugly.pdf"
13+
)
14+
15+
pdf_reader = PdfFileReader(str(pdf_path))
16+
pdf_writer = PdfFileWriter()
17+
18+
for n in range(pdf_reader.getNumPages()):
19+
page = pdf_reader.getPage(n)
20+
if n % 2 == 0:
21+
page.rotateClockwise(90)
22+
pdf_writer.addPage(page)
23+
24+
with Path("ugly_rotated.pdf").open(mode="wb") as output_file:
25+
pdf_writer.write(output_file)
26+
27+
pdf_reader = PdfFileReader(str(pdf_path))
28+
29+
print(pdf_reader.getPage(0))
30+
31+
page = pdf_reader.getPage(0)
32+
print(page["/Rotate"])
33+
34+
page = pdf_reader.getPage(1)
35+
print(page["/Rotate"])
36+
37+
page = pdf_reader.getPage(0)
38+
print(page["/Rotate"])
39+
40+
page.rotateClockwise(90)
41+
print(page["/Rotate"])
42+
43+
pdf_reader = PdfFileReader(str(pdf_path))
44+
pdf_writer = PdfFileWriter()
45+
46+
for page in pdf_reader.pages:
47+
if page["/Rotate"] == -90:
48+
page.rotateClockwise(90)
49+
pdf_writer.addPage(page)
50+
51+
with Path("ugly_rotated2.pdf").open(mode="wb") as output_file:
52+
pdf_writer.write(output_file)
53+
54+
55+
# --------------
56+
# Cropping Pages
57+
# --------------
58+
59+
from pathlib import Path
60+
from PyPDF2 import PdfFileReader, PdfFileWriter
61+
62+
pdf_path = (
63+
Path.home() /
64+
"creating-and-modifying-pdfs" /
65+
"practice_files" /
66+
"half_and_half.pdf"
67+
)
68+
69+
pdf_reader = PdfFileReader(str(pdf_path))
70+
first_page = pdf_reader.getPage(0)
71+
72+
print(first_page.mediaBox)
73+
print(first_page.mediaBox.lowerLeft)
74+
print(first_page.mediaBox.lowerRight)
75+
print(first_page.mediaBox.upperLeft)
76+
print(first_page.mediaBox.upperRight)
77+
print(first_page.mediaBox.upperRight[0])
78+
print(first_page.mediaBox.upperRight[1])
79+
80+
first_page.mediaBox.upperLeft = (0, 480)
81+
print(first_page.mediaBox.upperLeft)
82+
print(first_page.mediaBox.upperRight)
83+
84+
pdf_writer = PdfFileWriter()
85+
pdf_writer.addPage(first_page)
86+
with Path("cropped_page.pdf").open(mode="wb") as output_file:
87+
pdf_writer.write(output_file)
88+
89+
pdf_reader = PdfFileReader(str(pdf_path))
90+
pdf_writer = PdfFileWriter()
91+
92+
first_page = pdf_reader.getPage(0)
93+
94+
import copy
95+
left_side = copy.deepcopy(first_page)
96+
current_coords = left_side.mediaBox.upperRight
97+
new_coords = (current_coords[0] / 2, current_coords[1])
98+
left_side.mediaBox.upperRight = new_coords
99+
100+
right_side = copy.deepcopy(first_page)
101+
right_side.mediaBox.upperLeft = new_coords
102+
103+
pdf_writer.addPage(left_side)
104+
pdf_writer.addPage(right_side)
105+
with Path("cropped_pages.pdf").open(mode="wb") as output_file:
106+
pdf_writer.write(output_file)
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# ---------------
2+
# Encrypting PDFs
3+
# ---------------
4+
5+
from pathlib import Path
6+
from PyPDF2 import PdfFileReader, PdfFileWriter
7+
8+
pdf_path = (
9+
Path.home() /
10+
"creating-and-modifying-pdfs" /
11+
"practice_files" /
12+
"newsletter.pdf"
13+
)
14+
15+
pdf_reader = PdfFileReader(str(pdf_path))
16+
17+
pdf_writer = PdfFileWriter()
18+
pdf_writer.appendPagesFromReader(pdf_reader)
19+
20+
pdf_writer.encrypt(user_pwd="SuperSecret")
21+
22+
output_path = Path.home() / "newsletter_protected.pdf"
23+
with output_path.open(mode="wb") as output_file:
24+
pdf_writer.write(output_file)
25+
26+
user_pwd = "SuperSecret"
27+
owner_pwd = "ReallySuperSecret"
28+
pdf_writer.encrypt(user_pwd=user_pwd, owner_pwd=owner_pwd)
29+
30+
31+
# ---------------
32+
# Decrypting PDFs
33+
# ---------------
34+
35+
from pathlib import Path
36+
from PyPDF2 import PdfFileReader, PdfFileWriter
37+
38+
pdf_path = Path.home() / "newsletter_protected.pdf"
39+
40+
pdf_reader = PdfFileReader(str(pdf_path))
41+
42+
print(pdf_reader.getPage(0)) # Raises PdfReadError
43+
44+
print(pdf_reader.decrypt(password="SuperSecret"))
45+
46+
print(pdf_reader.getPage(0))
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# ----------------------
2+
# Using the Canvas Class
3+
# ----------------------
4+
5+
from reportlab.pdfgen.canvas import Canvas

0 commit comments

Comments
 (0)