Skip to content

Commit 2dc7bfb

Browse files
lpozoKateFinegangahjelle
authored
Update sample code for the PDF article (#389)
* Update sample code for the PDF article * TR updates, first round * Language edit * README LE * Final QA --------- Co-authored-by: KateFinegan <[email protected]> Co-authored-by: gahjelle <[email protected]>
1 parent 6889335 commit 2dc7bfb

File tree

7 files changed

+123
-139
lines changed

7 files changed

+123
-139
lines changed

creating-and-modifying-pdfs/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@ This folder contains resources and materials for Real Python's [Creating and Mod
44

55
There are two subfolders in this folder:
66

7-
1. **`practice_files/`:** Contains the sample PDFs used in the chapter
8-
2. **`source_code/`:** Contains source code from the chapter
7+
1. **`practice_files/`:** Contains the sample PDFs used in the tutorial
8+
2. **`source_code/`:** Contains source code from the tutorial
99

10-
The source code files are organized by section of the article, and the start of each subsection is indicated with comments.
10+
The source code files are organized by section of the tutorial, and the start of each subsection is indicated with comments.
1111

1212
The content of the companion tutorial was adapted from the "Creating and Modifying PDF Files" chapter of the book [*Python Basics: A Practical Introduction to Python 3*](https://realpython.com/products/python-basics-book/). If you enjoy this tutorial, check out the full book!
Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,65 +1,66 @@
11
# ---------------
2-
# Open a PDF File
2+
# Opening a PDF File
33
# ---------------
44

5-
from PyPDF2 import PdfFileReader
6-
7-
# You might need to change this to match the path on your computer
85
from pathlib import Path
96

7+
from pypdf import PdfReader
8+
109
pdf_path = (
1110
Path.home()
1211
/ "creating-and-modifying-pdfs"
1312
/ "practice_files"
1413
/ "Pride_and_Prejudice.pdf"
1514
)
1615

17-
pdf = PdfFileReader(str(pdf_path))
16+
pdf_reader = PdfReader(pdf_path)
1817

19-
print(pdf.getNumPages())
18+
print(len(pdf_reader.pages))
2019

21-
print(pdf.documentInfo)
20+
print(pdf_reader.metadata)
2221

23-
print(pdf.documentInfo.title)
22+
print(pdf_reader.metadata.title)
2423

2524

2625
# ---------------------------
2726
# Extracting Text From a Page
2827
# ---------------------------
2928

30-
first_page = pdf.getPage(0)
29+
first_page = pdf_reader.pages[0]
3130

3231
print(type(first_page))
3332

34-
print(first_page.extractText())
33+
print(first_page.extract_text())
3534

36-
for page in pdf.pages:
37-
print(page.extractText())
35+
for page in pdf_reader.pages:
36+
print(page.extract_text())
3837

3938

4039
# -----------------------
4140
# Putting It All Together
4241
# -----------------------
4342

4443
from pathlib import Path # noqa
45-
from PyPDF2 import PdfFileReader # noqa
44+
45+
from pypdf import PdfReader # noqa
4646

4747
# Change the path below to the correct path for your computer.
4848
pdf_path = (
4949
Path.home()
5050
/ "creating-and-modifying-pdfs"
51-
/ "practice-files"
51+
/ "practice_files"
5252
/ "Pride_and_Prejudice.pdf"
5353
)
5454

55-
pdf_reader = PdfFileReader(str(pdf_path))
56-
output_file_path = Path.home() / "Pride_and_Prejudice.txt"
55+
pdf_reader = PdfReader(pdf_path)
56+
txt_file = Path.home() / "Pride_and_Prejudice.txt"
57+
58+
content = [
59+
f"{pdf_reader.metadata.title}",
60+
f"Number of pages: {len(pdf_reader.pages)}",
61+
]
5762

58-
with output_file_path.open(mode="w") as output_file:
59-
title = pdf_reader.documentInfo.title
60-
num_pages = pdf_reader.getNumPages()
61-
output_file.write(f"{title}\\nNumber of pages: {num_pages}\\n\\n")
63+
for page in pdf_reader.pages:
64+
content.append(page.extract_text())
6265

63-
for page in pdf_reader.pages:
64-
text = page.extractText()
65-
output_file.write(text)
66+
txt_file.write_text("\n".join(content))
Lines changed: 20 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,26 @@
11
# -----------------------------
2-
# Using the PdfFileWriter Class
2+
# Using the PdfWriter Class
33
# -----------------------------
44

5-
from PyPDF2 import PdfFileWriter
5+
from pypdf import PdfWriter
66

7-
pdf_writer = PdfFileWriter()
7+
output_pdf = PdfWriter()
88

9-
page = pdf_writer.addBlankPage(width=72, height=72)
9+
page = output_pdf.add_blank_page(width=8.27 * 72, height=11.7 * 72)
1010

1111
print(type(page))
1212

1313
from pathlib import Path # noqa
1414

15-
with Path("blank.pdf").open(mode="wb") as output_file:
16-
pdf_writer.write(output_file)
17-
15+
output_pdf.write("blank.pdf")
1816

1917
# -----------------------------------
2018
# Extracting a Single Page From a PDF
2119
# -----------------------------------
2220

2321
from pathlib import Path # noqa
24-
from PyPDF2 import PdfFileReader, PdfFileWriter # noqa
22+
23+
from pypdf import PdfReader, PdfWriter # noqa
2524

2625
# Change the path to work on your computer if necessary
2726
pdf_path = (
@@ -30,43 +29,37 @@
3029
/ "practice_files"
3130
/ "Pride_and_Prejudice.pdf"
3231
)
33-
input_pdf = PdfFileReader(str(pdf_path))
32+
input_pdf = PdfReader(pdf_path)
3433

35-
first_page = input_pdf.getPage(0)
34+
first_page = input_pdf.pages[0]
3635

37-
pdf_writer = PdfFileWriter()
38-
pdf_writer.addPage(first_page)
36+
output_pdf = PdfWriter()
37+
output_pdf.add_page(first_page)
3938

40-
with Path("first_page.pdf").open(mode="wb") as output_file:
41-
pdf_writer.write(output_file)
39+
output_pdf.write("first_page.pdf")
4240

4341

4442
# ------------------------------------
4543
# Extracting Multiple Pages From a PDF
4644
# ------------------------------------
4745

48-
from PyPDF2 import PdfFileReader, PdfFileWriter # noqa
4946
from pathlib import Path # noqa
5047

48+
from pypdf import PdfReader, PdfWriter # noqa
49+
5150
pdf_path = (
5251
Path.home()
5352
/ "creating-and-modifying-pdfs"
5453
/ "practice_files"
5554
/ "Pride_and_Prejudice.pdf"
5655
)
57-
input_pdf = PdfFileReader(str(pdf_path))
56+
input_pdf = PdfReader(pdf_path)
5857

59-
pdf_writer = PdfFileWriter()
60-
for n in range(1, 4):
61-
page = input_pdf.getPage(n)
62-
pdf_writer.addPage(page)
63-
64-
print(pdf_writer.getNumPages())
65-
66-
pdf_writer = PdfFileWriter()
58+
output_pdf = PdfWriter()
6759

6860
for page in input_pdf.pages[1:4]:
69-
pdf_writer.addPage(page)
61+
output_pdf.add_page(page)
62+
63+
print(len(output_pdf.pages))
7064

71-
with Path("chapter1_slice.pdf").open(mode="wb") as output_file:
72-
pdf_writer.write(output_file)
65+
output_pdf.write("chapter1.pdf")

creating-and-modifying-pdfs/source_code/03-concatenating-and-merging-pdfs.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22
# Using the PdfFileMerger Class
33
# -----------------------------
44

5-
from PyPDF2 import PdfFileMerger
5+
from pypdf import PdfMerger
66

7-
pdf_merger = PdfFileMerger()
7+
pdf_merger = PdfMerger()
88

99
# ---------------------------------
1010
# Concatenating PDFs With .append()
@@ -22,25 +22,26 @@
2222
for path in reports_dir.glob("*.pdf"):
2323
print(path.name)
2424

25-
expense_reports = list(reports_dir.glob("*.pdf"))
26-
expense_reports.sort()
25+
expense_reports = sorted(reports_dir.glob("*.pdf"))
2726

2827
for path in expense_reports:
2928
print(path.name)
3029

30+
pdf_merger = PdfMerger()
31+
3132
for path in expense_reports:
32-
pdf_merger.append(str(path))
33+
pdf_merger.append(path)
3334

34-
with Path("expense_reports.pdf").open(mode="wb") as output_file:
35-
pdf_merger.write(output_file)
35+
pdf_merger.write("expense_reports.pdf")
3636

3737

3838
# --------------------------
3939
# Merging PDFs With .merge()
4040
# --------------------------
4141

4242
from pathlib import Path # noqa
43-
from PyPDF2 import PdfFileMerger # noqa
43+
44+
from pypdf import PdfMerger # noqa
4445

4546
report_dir = (
4647
Path.home()
@@ -52,10 +53,9 @@
5253
report_path = report_dir / "report.pdf"
5354
toc_path = report_dir / "toc.pdf"
5455

55-
pdf_merger = PdfFileMerger()
56-
pdf_merger.append(str(report_path))
56+
pdf_merger = PdfMerger()
57+
pdf_merger.append(report_path)
5758

58-
pdf_merger.merge(1, str(toc_path))
59+
pdf_merger.merge(1, toc_path)
5960

60-
with Path("full_report.pdf").open(mode="wb") as output_file:
61-
pdf_merger.write(output_file)
61+
pdf_merger.write("full_report.pdf")

0 commit comments

Comments
 (0)