Skip to content

Commit fe016da

Browse files
committed
ENH: _writer: Add method to scale and wrap text
This patch adds a method to scale and wrap text, depending on whether or not text is allowed to be wrapped. It takes a couple of arguments, including the text string itself, field width and height, font size, a dictionary with character widths, and a bool specifying whether or not text is allowed to wrap. Returns a list of lines and a font size.
1 parent a2e940c commit fe016da

File tree

2 files changed

+141
-0
lines changed

2 files changed

+141
-0
lines changed

pypdf/_writer.py

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3432,6 +3432,107 @@ def calculate_text_width(character_widths: dict[str, int], font_size: float, txt
34323432
return (total_font_units_width * font_size) / 1000.0
34333433

34343434

3435+
def scale_text(
3436+
font_descriptor: FontDescriptor,
3437+
font_size: float,
3438+
field_width: float,
3439+
field_height: float,
3440+
txt: str,
3441+
wrap_lines: bool,
3442+
min_font_size: float = 4.0, # Minimum font size to attempt
3443+
font_size_step: float = 0.2 # How much to decrease font size by each step
3444+
) -> tuple[list[str], float]:
3445+
"""
3446+
Takes a piece of text and scales it to field_width or field_height, given font_name
3447+
and font_size. For multiline fields, adds newlines to wrap the text.
3448+
3449+
Args:
3450+
font_descriptor: A FontDescriptor for the font to be used.
3451+
font_size: The font size in points.
3452+
field_width: The width of the field in which to fit the text.
3453+
field_height: The height of the field in which to fit the text.
3454+
txt: The text to fit with the field.
3455+
wrap_lines: Whether to scale and wrap the text, or only to scale.
3456+
min_font_size: The minimum font size at which to scale the text.
3457+
font_size_step: The amount by which to decrement font size per step while scaling.
3458+
3459+
Returns:
3460+
The text in terms of a list of wrapped_lines at a certain font_size.
3461+
"""
3462+
# Single line:
3463+
if not wrap_lines:
3464+
test_width = calculate_text_width(font_descriptor.character_widths, font_size, txt)
3465+
if test_width > field_width or font_size > field_height:
3466+
new_font_size = font_size - font_size_step
3467+
if new_font_size >= min_font_size:
3468+
# Text overflows height; Retry with smaller font size.
3469+
return scale_text(
3470+
font_descriptor,
3471+
new_font_size,
3472+
field_width,
3473+
field_height,
3474+
txt,
3475+
wrap_lines,
3476+
min_font_size,
3477+
font_size_step
3478+
)
3479+
# Font size lower than set minimum font size, give up.
3480+
return [txt], round(font_size, 1)
3481+
return [txt], round(font_size, 1)
3482+
# Multiline:
3483+
orig_txt = txt
3484+
paragraphs = re.sub(r"\n", "\r", txt).split("\r")
3485+
wrapped_lines = []
3486+
current_line_words: list[str] = []
3487+
current_line_width: float = 0
3488+
for paragraph in paragraphs:
3489+
if not paragraph.strip():
3490+
wrapped_lines.append("")
3491+
continue
3492+
words = paragraph.split(" ")
3493+
for i, word in enumerate(words):
3494+
word_width = calculate_text_width(font_descriptor.character_widths, font_size, word)
3495+
space_width = calculate_text_width(font_descriptor.character_widths, font_size, " ") if i > 0 else 0
3496+
test_width = current_line_width + space_width + word_width
3497+
if test_width > field_width and current_line_words:
3498+
wrapped_lines.append(" ".join(current_line_words))
3499+
current_line_words = [word]
3500+
current_line_width = word_width
3501+
elif not current_line_words and word_width > field_width:
3502+
wrapped_lines.append(word)
3503+
current_line_words = []
3504+
current_line_width = 0
3505+
else:
3506+
if current_line_words:
3507+
current_line_width += space_width
3508+
current_line_words.append(word)
3509+
current_line_width += word_width
3510+
if current_line_words:
3511+
wrapped_lines.append(" ".join(current_line_words))
3512+
current_line_words = []
3513+
current_line_width = 0
3514+
# Estimate total height.
3515+
# Assumed line spacing of 1.4
3516+
estimated_total_height = font_size + (len(wrapped_lines) - 1) * 1.4 * font_size
3517+
if estimated_total_height > field_height:
3518+
new_font_size = font_size - font_size_step
3519+
if new_font_size >= min_font_size:
3520+
# Text overflows height; Retry with smaller font size.
3521+
return scale_text(
3522+
font_descriptor,
3523+
new_font_size,
3524+
field_width,
3525+
field_height,
3526+
orig_txt,
3527+
wrap_lines,
3528+
min_font_size,
3529+
font_size_step
3530+
)
3531+
# Font size lower than set minimum font size, give up.
3532+
return wrapped_lines, round(font_size, 1)
3533+
return wrapped_lines, round(font_size, 1)
3534+
3535+
34353536
def generate_appearance_stream(
34363537
txt: str,
34373538
sel: list[str],

tests/test_writer.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
PdfWriter,
2020
Transformation,
2121
)
22+
from pypdf._font import FontDescriptor
23+
from pypdf._writer import scale_text
2224
from pypdf.annotations import Link
2325
from pypdf.errors import DeprecationError, PageSizeNotDefinedError, PyPdfError
2426
from pypdf.generic import (
@@ -1650,6 +1652,44 @@ def test_merge_content_stream_to_page():
16501652
assert contents[1].get_object().get_data() == more_content
16511653

16521654

1655+
def test_scale_text():
1656+
font_res = DictionaryObject({NameObject("/BaseFont"): NameObject("/Helvetica")})
1657+
my_font = FontDescriptor.from_font_resource(font_res)
1658+
field_width = 55.4
1659+
field_height = 9.1
1660+
font_size = 10.1
1661+
txt = "Hello World"
1662+
wrap_lines = False
1663+
lines, font_height = scale_text(my_font, font_size, field_width - 3, field_height - 3, txt, wrap_lines)
1664+
assert font_height == 5.9
1665+
txt = "This is a very very long sentence that probably will scale below the minimum font size"
1666+
lines, font_height = scale_text(my_font, font_size, field_width - 3, field_height - 3, txt, wrap_lines)
1667+
assert font_height == 4.1
1668+
field_width = 160
1669+
field_height = 360
1670+
font_size = 12.0
1671+
txt = """Welcome to pypdf
1672+
pypdf is a free and open source pure-python PDF library capable of splitting, merging, cropping, and
1673+
transforming the pages of PDF files. It can also add custom data, viewing options, and passwords to PDF
1674+
files. pypdf can retrieve text and metadata from PDFs as well.
1675+
1676+
See pdfly for a CLI application that uses pypdf to interact with PDFs.
1677+
"""
1678+
wrap_lines = True
1679+
lines, font_height = scale_text(my_font, font_size, field_width - 3, field_height - 3, txt, wrap_lines)
1680+
assert font_height == 12.0
1681+
assert lines[1] == "pypdf is a free and open"
1682+
field_height = 160
1683+
lines, font_height = scale_text(my_font, font_size, field_width - 3, field_height - 3, txt, wrap_lines)
1684+
assert font_height == 8.8
1685+
field_height = 12
1686+
lines, font_height = scale_text(my_font, font_size, field_width - 3, field_height - 3, txt, wrap_lines)
1687+
assert font_height == 4.0
1688+
txt = "pneumonoultramicroscopicsilicovolcanoconiosis"
1689+
lines, font_height = scale_text(my_font, font_size, field_width - 3, field_height - 3, txt, wrap_lines)
1690+
assert font_height == 8.8
1691+
1692+
16531693
@pytest.mark.enable_socket
16541694
def test_update_form_fields2():
16551695
my_files = {

0 commit comments

Comments
 (0)