Skip to content

Commit dcf997a

Browse files
authored
BUG: Search /DA in hierarchy fields (#2002)
Closes #1997
1 parent 524ddf9 commit dcf997a

File tree

2 files changed

+80
-9
lines changed

2 files changed

+80
-9
lines changed

pypdf/_writer.py

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@
8787
from .constants import PageAttributes as PG
8888
from .constants import PagesAttributes as PA
8989
from .constants import TrailerKeys as TK
90+
from .errors import PyPdfError
9091
from .generic import (
9192
PAGE_FIT,
9293
AnnotationBuilder,
@@ -836,11 +837,14 @@ def _update_text_field(self, field: DictionaryObject) -> None:
836837
rct = RectangleObject((0, 0, _rct[2] - _rct[0], _rct[3] - _rct[1]))
837838

838839
# Extract font information
839-
font_properties: Any = (
840-
cast(str, field[AA.DA]).replace("\n", " ").replace("\r", " ").split(" ")
841-
)
840+
da = cast(str, field[AA.DA])
841+
font_properties = da.replace("\n", " ").replace("\r", " ").split(" ")
842842
font_name = font_properties[font_properties.index("Tf") - 2]
843843
font_height = float(font_properties[font_properties.index("Tf") - 1])
844+
if font_height == 0:
845+
font_height = rct.height - 2
846+
font_properties[font_properties.index("Tf") - 1] = str(font_height)
847+
da = " ".join(font_properties)
844848
y_offset = rct.height - 1 - font_height
845849

846850
# Retrieve field text and selected values
@@ -855,7 +859,7 @@ def _update_text_field(self, field: DictionaryObject) -> None:
855859
sel = []
856860

857861
# Generate appearance stream
858-
ap_stream = f"q\n/Tx BMC \nq\n1 1 {rct.width - 1} {rct.height - 1} re\nW\nBT\n{field[AA.DA]}\n".encode()
862+
ap_stream = f"q\n/Tx BMC \nq\n1 1 {rct.width - 1} {rct.height - 1} re\nW\nBT\n{da}\n".encode()
859863
for line_number, line in enumerate(txt.replace("\n", "\r").split("\r")):
860864
if line in sel:
861865
# may be improved but can not find how get fill working => replaced with lined box
@@ -938,12 +942,21 @@ def update_page_form_field_values(
938942
auto_regenerate: set/unset the need_appearances flag ;
939943
the flag is unchanged if auto_regenerate is None
940944
"""
945+
if CatalogDictionary.ACRO_FORM not in self._root_object:
946+
raise PyPdfError("No /AcroForm dictionary in PdfWriter Object")
947+
af = cast(DictionaryObject, self._root_object[CatalogDictionary.ACRO_FORM])
948+
if InteractiveFormDictEntries.Fields not in af:
949+
raise PyPdfError("No /Fields dictionary in Pdf in PdfWriter Object")
941950
if isinstance(auto_regenerate, bool):
942951
self.set_need_appearances_writer(auto_regenerate)
943952
# Iterate through pages, update field values
944953
if PG.ANNOTS not in page:
945954
logger_warning("No fields to update on this page", __name__)
946955
return
956+
# /Helvetica is just in case of but this is normally insufficient as we miss the font ressource
957+
default_da = af.get(
958+
InteractiveFormDictEntries.DA, TextStringObject("/Helvetica 0 Tf 0 g")
959+
)
947960
for writer_annot in page[PG.ANNOTS]: # type: ignore
948961
writer_annot = cast(DictionaryObject, writer_annot.get_object())
949962
# retrieve parent field values, if present
@@ -968,6 +981,17 @@ def update_page_form_field_values(
968981
or writer_annot.get(FA.FT) == "/Ch"
969982
):
970983
# textbox
984+
if AA.DA not in writer_annot:
985+
f = writer_annot
986+
da = default_da
987+
while AA.DA not in f:
988+
f = f.get("/Parent")
989+
if f is None:
990+
break
991+
f = f.get_object()
992+
if AA.DA in f:
993+
da = f[AA.DA]
994+
writer_annot[NameObject(AA.DA)] = da
971995
self._update_text_field(writer_annot)
972996
elif writer_annot.get(FA.FT) == "/Sig":
973997
# signature

tests/test_writer.py

Lines changed: 52 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
PdfWriter,
1414
Transformation,
1515
)
16-
from pypdf.errors import DeprecationError, PageSizeNotDefinedError
16+
from pypdf.errors import DeprecationError, PageSizeNotDefinedError, PyPdfError
1717
from pypdf.generic import (
1818
ArrayObject,
1919
ContentStream,
@@ -437,10 +437,8 @@ def test_fill_form(pdf_file_path):
437437
reader = PdfReader(RESOURCE_ROOT / "form.pdf")
438438
writer = PdfWriter()
439439

440-
page = reader.pages[0]
441-
442-
writer.add_page(page)
443-
writer.add_page(PdfReader(RESOURCE_ROOT / "crazyones.pdf").pages[0])
440+
writer.append(reader, [0])
441+
writer.append(RESOURCE_ROOT / "crazyones.pdf", [0])
444442

445443
writer.update_page_form_field_values(
446444
writer.pages[0], {"foo": "some filled in text"}, flags=1
@@ -1535,3 +1533,52 @@ def test_watermark():
15351533
b = BytesIO()
15361534
writer.write(b)
15371535
assert len(b.getvalue()) < 2.1 * 1024 * 1024
1536+
1537+
1538+
@pytest.mark.enable_socket()
1539+
def test_da_missing_in_annot():
1540+
url = "https://github.com/py-pdf/pypdf/files/12136285/Building.Division.Permit.Application.pdf"
1541+
name = "BuildingDivisionPermitApplication.pdf"
1542+
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
1543+
writer = PdfWriter(clone_from=reader)
1544+
writer.update_page_form_field_values(
1545+
writer.pages[0], {"PCN-1": "0"}, auto_regenerate=False
1546+
)
1547+
b = BytesIO()
1548+
writer.write(b)
1549+
reader = PdfReader(BytesIO(b.getvalue()))
1550+
ff = reader.get_fields()
1551+
# check for autosize processing
1552+
assert (
1553+
b"0 Tf"
1554+
not in ff["PCN-1"].indirect_reference.get_object()["/AP"]["/N"].get_data()
1555+
)
1556+
f2 = writer.get_object(ff["PCN-2"].indirect_reference.idnum)
1557+
f2[NameObject("/Parent")] = writer.get_object(
1558+
ff["PCN-1"].indirect_reference.idnum
1559+
).indirect_reference
1560+
writer.update_page_form_field_values(
1561+
writer.pages[0], {"PCN-2": "1"}, auto_regenerate=False
1562+
)
1563+
1564+
1565+
def test_missing_fields(pdf_file_path):
1566+
reader = PdfReader(RESOURCE_ROOT / "form.pdf")
1567+
1568+
writer = PdfWriter()
1569+
writer.add_page(reader.pages[0])
1570+
1571+
with pytest.raises(PyPdfError) as exc:
1572+
writer.update_page_form_field_values(
1573+
writer.pages[0], {"foo": "some filled in text"}, flags=1
1574+
)
1575+
assert exc.value.args[0] == "No /AcroForm dictionary in PdfWriter Object"
1576+
1577+
writer = PdfWriter()
1578+
writer.append(reader, [0])
1579+
del writer._root_object["/AcroForm"]["/Fields"]
1580+
with pytest.raises(PyPdfError) as exc:
1581+
writer.update_page_form_field_values(
1582+
writer.pages[0], {"foo": "some filled in text"}, flags=1
1583+
)
1584+
assert exc.value.args[0] == "No /Fields dictionary in Pdf in PdfWriter Object"

0 commit comments

Comments
 (0)