Skip to content
6 changes: 3 additions & 3 deletions pypdf/_cmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,9 +461,9 @@ def build_font_width_map(
m = 0
cpt = 0
for xx in w:
xx = xx.get_object()
if xx > 0:
m += xx
xx_value = xx.get_object()
if xx_value > 0:
m += xx_value
cpt += 1
font_width_map["default"] = m / max(1, cpt)
st = cast(int, ft["/FirstChar"])
Expand Down
28 changes: 14 additions & 14 deletions pypdf/_doc_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,8 +391,8 @@ def recursive_call(
return top, -1
return None, mi + ma
for idx, kid in enumerate(cast(ArrayObject, node["/Kids"])):
kid = cast(DictionaryObject, kid.get_object())
n, i = recursive_call(kid, mi)
kid_object = cast(DictionaryObject, kid.get_object())
n, i = recursive_call(kid_object, mi)
if n is not None: # page has just been found ...
if i < 0: # ... just below!
return node, idx
Expand Down Expand Up @@ -612,8 +612,8 @@ def _build_field(
states: list[str] = []
retval[key][NameObject("/_States_")] = ArrayObject(states)
for k in obj.get(FA.Kids, {}):
k = k.get_object()
for s in list(k["/AP"]["/N"].keys()):
k_object = k.get_object()
for s in list(k_object["/AP"]["/N"].keys()):
if s not in states:
states.append(s)
retval[key][NameObject("/_States_")] = ArrayObject(states)
Expand Down Expand Up @@ -641,8 +641,8 @@ def _check_kids(
if PagesAttributes.KIDS in tree:
# recurse down the tree
for kid in tree[PagesAttributes.KIDS]: # type: ignore
kid = kid.get_object()
self.get_fields(kid, retval, fileobj, stack)
kid_object = kid.get_object()
self.get_fields(kid_object, retval, fileobj, stack)

def _write_field(self, fileobj: Any, field: Any, field_attributes: Any) -> None:
field_attributes_tuple = FA.attributes()
Expand Down Expand Up @@ -771,16 +771,16 @@ def _get_inherited(obj: DictionaryObject, key: str) -> Any:
else:
kids = field.get("/Kids", ())
for k in kids:
k = k.get_object()
if (k.get("/Subtype", "") == "/Widget") and ("/T" not in k):
k_object = k.get_object()
if (k_object.get("/Subtype", "") == "/Widget") and ("/T" not in k_object):
# Kid that is just a widget, not a field:
if "/P" in k:
ret += [k["/P"].get_object()]
if "/P" in k_object:
ret += [k_object["/P"].get_object()]
else:
ret += [
p
for p in self.pages
if k.indirect_reference in p.get("/Annots", "")
if k_object.indirect_reference in p.get("/Annots", "")
]
return [
x
Expand Down Expand Up @@ -1317,9 +1317,9 @@ def xfa(self) -> Optional[dict[str, Any]]:
i = iter(fields)
for f in i:
tag = f
f = next(i)
if isinstance(f, IndirectObject):
field = cast(Optional[EncodedStreamObject], f.get_object())
field_value = next(i)
if isinstance(field_value, IndirectObject):
field = cast(Optional[EncodedStreamObject], field_value.get_object())
if field:
es = zlib.decompress(field._data)
retval[tag] = es
Expand Down
25 changes: 14 additions & 11 deletions pypdf/_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -743,14 +743,17 @@ def _get_inline_images(self) -> dict[str, ImageFile]:
if k in {"/Length", "/L"}: # no length is expected
continue
if isinstance(v, list):
v = ArrayObject(
[self._translate_value_inline_image(k, x) for x in v]
value_for_init = cast(
PdfObject,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ArrayObject is a PdfObject. If you want to type value_for_init, please use value_for_init: PdfObject = ... without the cast.

ArrayObject(
[self._translate_value_inline_image(k, x) for x in v]
),
)
else:
v = self._translate_value_inline_image(k, v)
k = NameObject(_INLINE_IMAGE_KEY_MAPPING[k])
if k not in init:
init[k] = v
value_for_init = self._translate_value_inline_image(k, v)
mapped_k = NameObject(_INLINE_IMAGE_KEY_MAPPING[k])
if mapped_k not in init:
init[mapped_k] = value_for_init
ii["object"] = EncodedStreamObject.initialize_from_dictionary(init)
extension, byte_stream, img = _xobj_to_image(ii["object"])
files[f"~{num}~"] = ImageFile(
Expand Down Expand Up @@ -1236,13 +1239,13 @@ def _merge_page_writer(
else:
trsf = Transformation(ctm)
for a in cast(ArrayObject, page2[PG.ANNOTS]):
a = a.get_object()
aa = a.clone(
annotation_object = a.get_object()
aa = annotation_object.clone(
pdf,
ignore_fields=("/P", "/StructParent", "/Parent"),
force_duplicate=True,
)
r = cast(ArrayObject, a["/Rect"])
r = cast(ArrayObject, annotation_object["/Rect"])
pt1 = trsf.apply_on((r[0], r[1]), True)
pt2 = trsf.apply_on((r[2], r[3]), True)
aa[NameObject("/Rect")] = ArrayObject(
Expand All @@ -1253,8 +1256,8 @@ def _merge_page_writer(
max(pt1[1], pt2[1]),
)
)
if "/QuadPoints" in a:
q = cast(ArrayObject, a["/QuadPoints"])
if "/QuadPoints" in annotation_object:
q = cast(ArrayObject, annotation_object["/QuadPoints"])
aa[NameObject("/QuadPoints")] = ArrayObject(
trsf.apply_on((q[0], q[1]), True)
+ trsf.apply_on((q[2], q[3]), True)
Expand Down
5 changes: 3 additions & 2 deletions pypdf/_text_extraction/_layout_mode/_fixed_width_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,13 +186,14 @@ def recurs_to_target_op(
tj_ops.append(text_state_mgr.text_state_params(operands[2]))
elif op in (b"Td", b"Tm", b"TD", b"T*"):
text_state_mgr.reset_trm()
operands_for_tm = operands
if op == b"Tm":
text_state_mgr.reset_tm()
elif op == b"TD":
text_state_mgr.set_state_param(b"TL", -operands[1])
elif op == b"T*":
operands = [0, -text_state_mgr.TL]
text_state_mgr.add_tm(operands)
operands_for_tm = [0, -text_state_mgr.TL]
text_state_mgr.add_tm(operands_for_tm)
elif op == b"Tf":
text_state_mgr.set_font(fonts[operands[0]], operands[1])
else: # handle Tc, Tw, Tz, TL, and Ts operators
Expand Down
17 changes: 10 additions & 7 deletions pypdf/_text_extraction/_layout_mode/_font.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,12 @@ def __post_init__(self) -> None:
for d_font_idx, d_font in enumerate(
self.font_dictionary["/DescendantFonts"]
):
while isinstance(d_font, IndirectObject):
d_font = d_font.get_object()
self.font_dictionary["/DescendantFonts"][d_font_idx] = d_font
if isinstance(d_font, IndirectObject):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We do not need this check. Calling d_font.get_object() should always work.

d_font_object = d_font.get_object()
else:
d_font_object = d_font
assert not isinstance(d_font_object, IndirectObject), d_font_object
self.font_dictionary["/DescendantFonts"][d_font_idx] = d_font_object
ord_map = {
ord(_target): _surrogate
for _target, _surrogate in self.char_map.items()
Expand All @@ -80,18 +83,18 @@ def __post_init__(self) -> None:
skip_count = 0
_w = d_font.get("/W", [])
for idx, w_entry in enumerate(_w):
w_entry = w_entry.get_object()
w_value = w_entry.get_object()
if skip_count:
skip_count -= 1
continue
if not isinstance(w_entry, (int, float)): # pragma: no cover
if not isinstance(w_value, (int, float)): # pragma: no cover
# We should never get here due to skip_count above. Add a
# warning and or use reader's "strict" to force an ex???
continue
# check for format (1): `int [int int int int ...]`
w_next_entry = _w[idx + 1].get_object()
if isinstance(w_next_entry, Sequence):
start_idx, width_list = w_entry, w_next_entry
start_idx, width_list = w_value, w_next_entry
self.width_map.update(
{
ord_map[_cidx]: _width
Expand All @@ -112,7 +115,7 @@ def __post_init__(self) -> None:
_w[idx + 2].get_object(), (int, float)
):
start_idx, stop_idx, const_width = (
w_entry,
w_value,
w_next_entry,
_w[idx + 2].get_object(),
)
Expand Down
30 changes: 17 additions & 13 deletions pypdf/_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -982,8 +982,8 @@ def update_page_form_field_values(
if PG.ANNOTS not in page:
logger_warning("No fields to update on this page", __name__)
return
for annotation in page[PG.ANNOTS]: # type: ignore
annotation = cast(DictionaryObject, annotation.get_object())
for annotation_ref in page[PG.ANNOTS]: # type: ignore
annotation = cast(DictionaryObject, annotation_ref.get_object())
if annotation.get("/Subtype", "") != "/Widget":
continue
if "/FT" in annotation and "/T" in annotation:
Expand All @@ -1007,7 +1007,6 @@ def update_page_form_field_values(
del parent_annotation["/I"]
if flags:
annotation[NameObject(FA.Ff)] = NumberObject(flags)
# Set the field value
if not (value is None and flatten): # Only change values if given by user and not flattening.
if isinstance(value, list):
lst = ArrayObject(TextStringObject(v) for v in value)
Expand All @@ -1029,9 +1028,14 @@ def update_page_form_field_values(
if v not in normal_ap:
v = NameObject("/Off")
appearance_stream_obj = normal_ap.get(v)
# Other cases will be updated through the for loop
# other cases will be updated through the for loop
annotation[NameObject(AA.AS)] = v
annotation[NameObject(FA.V)] = v
if flatten and appearance_stream_obj is not None:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where does this code originate from?

# We basically copy the entire appearance stream, which should be an XObject that
# is already registered. No need to add font resources.
rct = cast(RectangleObject, annotation[AA.Rect])
self._add_apstream_object(page, appearance_stream_obj, field, rct[0], rct[1])
elif (
parent_annotation.get(FA.FT) == "/Tx"
or parent_annotation.get(FA.FT) == "/Ch"
Expand All @@ -1050,7 +1054,7 @@ def update_page_form_field_values(
annotation[NameObject(AA.AP)] = DictionaryObject(
{NameObject("/N"): self._add_object(appearance_stream_obj)}
)
elif "/N" not in (ap:= cast(DictionaryObject, annotation[AA.AP])):
elif "/N" not in (ap := cast(DictionaryObject, annotation[AA.AP])):
cast(DictionaryObject, annotation[NameObject(AA.AP)])[
NameObject("/N")
] = self._add_object(appearance_stream_obj)
Expand Down Expand Up @@ -1100,9 +1104,9 @@ def reattach_fields(
if "/Annots" not in page:
return lst
annotations = cast(ArrayObject, page["/Annots"])
for idx, annotation in enumerate(annotations):
is_indirect = isinstance(annotation, IndirectObject)
annotation = cast(DictionaryObject, annotation.get_object())
for idx, annotation_ref in enumerate(annotations):
is_indirect = isinstance(annotation_ref, IndirectObject)
annotation = cast(DictionaryObject, annotation_ref.get_object())
if annotation.get("/Subtype", "") == "/Widget" and "/FT" in annotation:
if (
"indirect_reference" in annotation.__dict__
Expand Down Expand Up @@ -1472,9 +1476,10 @@ def _write_pdf_structure(self, stream: StreamType) -> tuple[list[int], list[int]
if obj is not None:
object_positions.append(stream.tell())
stream.write(f"{idnum} 0 obj\n".encode())
object_to_write = obj
if self._encryption and obj != self._encrypt_entry:
obj = self._encryption.encrypt_object(obj, idnum, 0)
obj.write_to_stream(stream)
object_to_write = self._encryption.encrypt_object(obj, idnum, 0)
object_to_write.write_to_stream(stream)
stream.write(b"\nendobj\n")
else:
object_positions.append(-1)
Expand Down Expand Up @@ -1563,9 +1568,8 @@ def add_metadata(self, infos: dict[str, Any]) -> None:
if isinstance(infos, PdfObject):
infos = cast(DictionaryObject, infos.get_object())
for key, value in list(infos.items()):
if isinstance(value, PdfObject):
value = value.get_object()
args[NameObject(key)] = create_string_object(str(value))
value_obj = value.get_object() if isinstance(value, PdfObject) else value
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
value_obj = value.get_object() if isinstance(value, PdfObject) else value
value_object = value.get_object() if isinstance(value, PdfObject) else value

args[NameObject(key)] = create_string_object(str(value_obj))
if self._info is None:
self._info = DictionaryObject()
self._info.update(args)
Expand Down
17 changes: 9 additions & 8 deletions pypdf/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -759,30 +759,31 @@ def decode_stream_data(stream: Any) -> bytes:
# If there is no data to decode, we should not try to decode it.
if not data:
return data
for filter_name, params in zip(filters, decode_parms):
if isinstance(params, NullObject):
params = {}
for filter_name, params_untyped in zip(filters, decode_parms):
params_typed: Optional[DictionaryObject] = None
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not params directly to reduce the diff?

if not isinstance(params_untyped, NullObject):
params_typed = cast(Optional[DictionaryObject], params_untyped)
if filter_name in (FT.ASCII_HEX_DECODE, FTA.AHx):
data = ASCIIHexDecode.decode(data)
elif filter_name in (FT.ASCII_85_DECODE, FTA.A85):
data = ASCII85Decode.decode(data)
elif filter_name in (FT.LZW_DECODE, FTA.LZW):
data = LZWDecode.decode(data, params)
data = LZWDecode.decode(data, params_typed)
elif filter_name in (FT.FLATE_DECODE, FTA.FL):
data = FlateDecode.decode(data, params)
data = FlateDecode.decode(data, params_typed)
elif filter_name in (FT.RUN_LENGTH_DECODE, FTA.RL):
data = RunLengthDecode.decode(data)
elif filter_name == FT.CCITT_FAX_DECODE:
height = stream.get(IA.HEIGHT, ())
data = CCITTFaxDecode.decode(data, params, height)
data = CCITTFaxDecode.decode(data, params_typed, height)
elif filter_name == FT.DCT_DECODE:
data = DCTDecode.decode(data)
elif filter_name == FT.JPX_DECODE:
data = JPXDecode.decode(data)
elif filter_name == FT.JBIG2_DECODE:
data = JBIG2Decode.decode(data, params)
data = JBIG2Decode.decode(data, params_typed)
elif filter_name == "/Crypt":
if "/Name" in params or "/Type" in params:
if "/Name" in params_untyped or "/Type" in params_untyped:
raise NotImplementedError(
"/Crypt filter with /Name or /Type not supported yet"
)
Expand Down
6 changes: 3 additions & 3 deletions pypdf/generic/_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,8 +394,8 @@ def _load(cls, catalog: DictionaryObject) -> Generator[EmbeddedFile]:
for kid in cast(ArrayObject, container["/Kids"].get_object()):
# There might be further (nested) kids here.
# Wait for an example before evaluating an implementation.
kid = kid.get_object()
if "/Names" in kid:
yield from cls._load_from_names(cast(ArrayObject, kid["/Names"]))
kid_object = kid.get_object()
if "/Names" in kid_object:
yield from cls._load_from_names(cast(ArrayObject, kid_object["/Names"]))
if "/Names" in container:
yield from cls._load_from_names(cast(ArrayObject, container["/Names"]))
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,6 @@ ignore = [
"PERF203", # `try`-`except` within a loop incurs performance overhead
"PGH003", # Use specific rule codes when ignoring type issues
"PLW1510", # `subprocess.run` without explicit `check` argument
"PLW2901", # `with` statement variable `img` overwritten by assignment target
"PT011", # `pytest.raises(ValueError)` is too broad, set the `match`
"PT012", # `pytest.raises()` block should contain a single simple statement
"PT014", # Ruff bug: Duplicate of test case at index 1 in `@pytest_mark.parametrize`
Expand Down
10 changes: 4 additions & 6 deletions tests/scripts/test_make_release.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ def test_strip_header(data, expected):
def test_get_git_commits_since_tag():
make_release = pytest.importorskip("make_release")

with open(COMMITS__VERSION_4_0_1, mode="rb") as commits, mock.patch(
"urllib.request.urlopen", side_effect=lambda _: commits
with open(COMMITS__VERSION_4_0_1, mode="rb") as commits_fh, mock.patch(
"urllib.request.urlopen", side_effect=lambda _: commits_fh
), mock.patch("subprocess.check_output", return_value=GIT_LOG__VERSION_4_0_1):
commits = make_release.get_git_commits_since_tag("4.0.1")
assert commits == [
Expand Down Expand Up @@ -87,8 +87,8 @@ def test_get_git_commits_since_tag():
def test_get_formatted_changes():
make_release = pytest.importorskip("make_release")

with open(COMMITS__VERSION_4_0_1, mode="rb") as commits, mock.patch(
"urllib.request.urlopen", side_effect=lambda _: commits
with open(COMMITS__VERSION_4_0_1, mode="rb") as commits_fh, mock.patch(
"urllib.request.urlopen", side_effect=lambda _: commits_fh
), mock.patch("subprocess.check_output", return_value=GIT_LOG__VERSION_4_0_1):
output, output_with_user = make_release.get_formatted_changes("4.0.1")

Expand Down Expand Up @@ -126,8 +126,6 @@ def test_get_formatted_changes():
- Avoid catching not emitted warnings (#2429) by @stefan6419846
"""
)


Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is most likely wrong.

def test_get_formatted_changes__other():
make_release = pytest.importorskip("make_release")

Expand Down
4 changes: 2 additions & 2 deletions tests/test_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,9 @@ def test_flatedecode_unsupported_predictor():
predictors = (-10, -1, 0, 9, 16, 20, 100)

for predictor, s in cartesian_product(predictors, filter_inputs):
s = s.encode()
s_bytes = s.encode()
with pytest.raises(PdfReadError):
codec.decode(codec.encode(s), DictionaryObject({"/Predictor": predictor}))
codec.decode(codec.encode(s_bytes), DictionaryObject({"/Predictor": predictor}))


@pytest.mark.parametrize(
Expand Down
4 changes: 2 additions & 2 deletions tests/test_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ def open_image(path: Union[Path, Image.Image, BytesIO]) -> Image.Image:
else:
if isinstance(path, Path):
assert path.exists()
with Image.open(path) as img:
with Image.open(path) as opened_img:
img = (
img.copy()
opened_img.copy()
) # Opened image should be copied to avoid issues with file closing
return img

Expand Down
Loading
Loading