diff --git a/pypdf/_cmap.py b/pypdf/_cmap.py index 2b89a34fe2..599665d8cb 100644 --- a/pypdf/_cmap.py +++ b/pypdf/_cmap.py @@ -461,9 +461,9 @@ def build_font_width_map( m = 0 cpt = 0 for xx in w: - xx = xx.get_object() - if xx > 0: - m += xx + xx_value = xx.get_object() + if xx_value > 0: + m += xx_value cpt += 1 font_width_map["default"] = m / max(1, cpt) st = cast(int, ft["/FirstChar"]) diff --git a/pypdf/_doc_common.py b/pypdf/_doc_common.py index 9d92ebb0ea..995a9f667b 100644 --- a/pypdf/_doc_common.py +++ b/pypdf/_doc_common.py @@ -391,8 +391,8 @@ def recursive_call( return top, -1 return None, mi + ma for idx, kid in enumerate(cast(ArrayObject, node["/Kids"])): - kid = cast(DictionaryObject, kid.get_object()) - n, i = recursive_call(kid, mi) + kid_object = cast(DictionaryObject, kid.get_object()) + n, i = recursive_call(kid_object, mi) if n is not None: # page has just been found ... if i < 0: # ... just below! return node, idx @@ -612,8 +612,8 @@ def _build_field( states: list[str] = [] retval[key][NameObject("/_States_")] = ArrayObject(states) for k in obj.get(FA.Kids, {}): - k = k.get_object() - for s in list(k["/AP"]["/N"].keys()): + k_object = k.get_object() + for s in list(k_object["/AP"]["/N"].keys()): if s not in states: states.append(s) retval[key][NameObject("/_States_")] = ArrayObject(states) @@ -641,8 +641,8 @@ def _check_kids( if PagesAttributes.KIDS in tree: # recurse down the tree for kid in tree[PagesAttributes.KIDS]: # type: ignore - kid = kid.get_object() - self.get_fields(kid, retval, fileobj, stack) + kid_object = kid.get_object() + self.get_fields(kid_object, retval, fileobj, stack) def _write_field(self, fileobj: Any, field: Any, field_attributes: Any) -> None: field_attributes_tuple = FA.attributes() @@ -771,16 +771,16 @@ def _get_inherited(obj: DictionaryObject, key: str) -> Any: else: kids = field.get("/Kids", ()) for k in kids: - k = k.get_object() - if (k.get("/Subtype", "") == "/Widget") and ("/T" not in k): + k_object = k.get_object() + if (k_object.get("/Subtype", "") == "/Widget") and ("/T" not in k_object): # Kid that is just a widget, not a field: - if "/P" in k: - ret += [k["/P"].get_object()] + if "/P" in k_object: + ret += [k_object["/P"].get_object()] else: ret += [ p for p in self.pages - if k.indirect_reference in p.get("/Annots", "") + if k_object.indirect_reference in p.get("/Annots", "") ] return [ x @@ -1317,9 +1317,9 @@ def xfa(self) -> Optional[dict[str, Any]]: i = iter(fields) for f in i: tag = f - f = next(i) - if isinstance(f, IndirectObject): - field = cast(Optional[EncodedStreamObject], f.get_object()) + field_value = next(i) + if isinstance(field_value, IndirectObject): + field = cast(Optional[EncodedStreamObject], field_value.get_object()) if field: es = zlib.decompress(field._data) retval[tag] = es diff --git a/pypdf/_page.py b/pypdf/_page.py index c040180942..0a3923d5a5 100644 --- a/pypdf/_page.py +++ b/pypdf/_page.py @@ -740,14 +740,17 @@ def _get_inline_images(self) -> dict[str, ImageFile]: if k in {"/Length", "/L"}: # no length is expected continue if isinstance(v, list): - v = ArrayObject( - [self._translate_value_inline_image(k, x) for x in v] + value_for_init = cast( + PdfObject, + ArrayObject( + [self._translate_value_inline_image(k, x) for x in v] + ), ) else: - v = self._translate_value_inline_image(k, v) - k = NameObject(_INLINE_IMAGE_KEY_MAPPING[k]) - if k not in init: - init[k] = v + value_for_init = self._translate_value_inline_image(k, v) + mapped_k = NameObject(_INLINE_IMAGE_KEY_MAPPING[k]) + if mapped_k not in init: + init[mapped_k] = value_for_init ii["object"] = EncodedStreamObject.initialize_from_dictionary(init) extension, byte_stream, img = _xobj_to_image(ii["object"]) files[f"~{num}~"] = ImageFile( @@ -1231,13 +1234,13 @@ def _merge_page_writer( else: trsf = Transformation(ctm) for a in cast(ArrayObject, page2[PG.ANNOTS]): - a = a.get_object() - aa = a.clone( + annotation_object = a.get_object() + aa = annotation_object.clone( pdf, ignore_fields=("/P", "/StructParent", "/Parent"), force_duplicate=True, ) - r = cast(ArrayObject, a["/Rect"]) + r = cast(ArrayObject, annotation_object["/Rect"]) pt1 = trsf.apply_on((r[0], r[1]), True) pt2 = trsf.apply_on((r[2], r[3]), True) aa[NameObject("/Rect")] = ArrayObject( @@ -1248,8 +1251,8 @@ def _merge_page_writer( max(pt1[1], pt2[1]), ) ) - if "/QuadPoints" in a: - q = cast(ArrayObject, a["/QuadPoints"]) + if "/QuadPoints" in annotation_object: + q = cast(ArrayObject, annotation_object["/QuadPoints"]) aa[NameObject("/QuadPoints")] = ArrayObject( trsf.apply_on((q[0], q[1]), True) + trsf.apply_on((q[2], q[3]), True) diff --git a/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py b/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py index 43d6f15654..179f029b88 100644 --- a/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py +++ b/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py @@ -186,13 +186,14 @@ def recurs_to_target_op( tj_ops.append(text_state_mgr.text_state_params(operands[2])) elif op in (b"Td", b"Tm", b"TD", b"T*"): text_state_mgr.reset_trm() + operands_for_tm = operands if op == b"Tm": text_state_mgr.reset_tm() elif op == b"TD": text_state_mgr.set_state_param(b"TL", -operands[1]) elif op == b"T*": - operands = [0, -text_state_mgr.TL] - text_state_mgr.add_tm(operands) + operands_for_tm = [0, -text_state_mgr.TL] + text_state_mgr.add_tm(operands_for_tm) elif op == b"Tf": text_state_mgr.set_font(fonts[operands[0]], operands[1]) else: # handle Tc, Tw, Tz, TL, and Ts operators diff --git a/pypdf/_text_extraction/_layout_mode/_font.py b/pypdf/_text_extraction/_layout_mode/_font.py index 3b6cd86ac7..daf03aa247 100644 --- a/pypdf/_text_extraction/_layout_mode/_font.py +++ b/pypdf/_text_extraction/_layout_mode/_font.py @@ -64,9 +64,12 @@ def __post_init__(self) -> None: for d_font_idx, d_font in enumerate( self.font_dictionary["/DescendantFonts"] ): - while isinstance(d_font, IndirectObject): - d_font = d_font.get_object() - self.font_dictionary["/DescendantFonts"][d_font_idx] = d_font + if isinstance(d_font, IndirectObject): + d_font_object = d_font.get_object() + else: + d_font_object = d_font + assert not isinstance(d_font_object, IndirectObject), d_font_object + self.font_dictionary["/DescendantFonts"][d_font_idx] = d_font_object ord_map = { ord(_target): _surrogate for _target, _surrogate in self.char_map.items() @@ -80,18 +83,18 @@ def __post_init__(self) -> None: skip_count = 0 _w = d_font.get("/W", []) for idx, w_entry in enumerate(_w): - w_entry = w_entry.get_object() + w_value = w_entry.get_object() if skip_count: skip_count -= 1 continue - if not isinstance(w_entry, (int, float)): # pragma: no cover + if not isinstance(w_value, (int, float)): # pragma: no cover # We should never get here due to skip_count above. Add a # warning and or use reader's "strict" to force an ex??? continue # check for format (1): `int [int int int int ...]` w_next_entry = _w[idx + 1].get_object() if isinstance(w_next_entry, Sequence): - start_idx, width_list = w_entry, w_next_entry + start_idx, width_list = w_value, w_next_entry self.width_map.update( { ord_map[_cidx]: _width @@ -112,7 +115,7 @@ def __post_init__(self) -> None: _w[idx + 2].get_object(), (int, float) ): start_idx, stop_idx, const_width = ( - w_entry, + w_value, w_next_entry, _w[idx + 2].get_object(), ) diff --git a/pypdf/_writer.py b/pypdf/_writer.py index b4dd9db145..674b985d4d 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -978,8 +978,8 @@ def update_page_form_field_values( if PG.ANNOTS not in page: logger_warning("No fields to update on this page", __name__) return - for annotation in page[PG.ANNOTS]: # type: ignore - annotation = cast(DictionaryObject, annotation.get_object()) + for annotation_ref in page[PG.ANNOTS]: # type: ignore + annotation = cast(DictionaryObject, annotation_ref.get_object()) if annotation.get("/Subtype", "") != "/Widget": continue if "/FT" in annotation and "/T" in annotation: @@ -1003,7 +1003,6 @@ def update_page_form_field_values( del parent_annotation["/I"] if flags: annotation[NameObject(FA.Ff)] = NumberObject(flags) - # Set the field value if not (value is None and flatten): # Only change values if given by user and not flattening. if isinstance(value, list): lst = ArrayObject(TextStringObject(v) for v in value) @@ -1025,9 +1024,14 @@ def update_page_form_field_values( if v not in normal_ap: v = NameObject("/Off") appearance_stream_obj = normal_ap.get(v) - # Other cases will be updated through the for loop + # other cases will be updated through the for loop annotation[NameObject(AA.AS)] = v annotation[NameObject(FA.V)] = v + if flatten and appearance_stream_obj is not None: + # We basically copy the entire appearance stream, which should be an XObject that + # is already registered. No need to add font resources. + rct = cast(RectangleObject, annotation[AA.Rect]) + self._add_apstream_object(page, appearance_stream_obj, field, rct[0], rct[1]) elif ( parent_annotation.get(FA.FT) == "/Tx" or parent_annotation.get(FA.FT) == "/Ch" @@ -1046,7 +1050,7 @@ def update_page_form_field_values( annotation[NameObject(AA.AP)] = DictionaryObject( {NameObject("/N"): self._add_object(appearance_stream_obj)} ) - elif "/N" not in (ap:= cast(DictionaryObject, annotation[AA.AP])): + elif "/N" not in (ap := cast(DictionaryObject, annotation[AA.AP])): cast(DictionaryObject, annotation[NameObject(AA.AP)])[ NameObject("/N") ] = self._add_object(appearance_stream_obj) @@ -1096,9 +1100,9 @@ def reattach_fields( if "/Annots" not in page: return lst annotations = cast(ArrayObject, page["/Annots"]) - for idx, annotation in enumerate(annotations): - is_indirect = isinstance(annotation, IndirectObject) - annotation = cast(DictionaryObject, annotation.get_object()) + for idx, annotation_ref in enumerate(annotations): + is_indirect = isinstance(annotation_ref, IndirectObject) + annotation = cast(DictionaryObject, annotation_ref.get_object()) if annotation.get("/Subtype", "") == "/Widget" and "/FT" in annotation: if ( "indirect_reference" in annotation.__dict__ @@ -1468,9 +1472,10 @@ def _write_pdf_structure(self, stream: StreamType) -> tuple[list[int], list[int] if obj is not None: object_positions.append(stream.tell()) stream.write(f"{idnum} 0 obj\n".encode()) + object_to_write = obj if self._encryption and obj != self._encrypt_entry: - obj = self._encryption.encrypt_object(obj, idnum, 0) - obj.write_to_stream(stream) + object_to_write = self._encryption.encrypt_object(obj, idnum, 0) + object_to_write.write_to_stream(stream) stream.write(b"\nendobj\n") else: object_positions.append(-1) @@ -1559,9 +1564,8 @@ def add_metadata(self, infos: dict[str, Any]) -> None: if isinstance(infos, PdfObject): infos = cast(DictionaryObject, infos.get_object()) for key, value in list(infos.items()): - if isinstance(value, PdfObject): - value = value.get_object() - args[NameObject(key)] = create_string_object(str(value)) + value_obj = value.get_object() if isinstance(value, PdfObject) else value + args[NameObject(key)] = create_string_object(str(value_obj)) if self._info is None: self._info = DictionaryObject() self._info.update(args) diff --git a/pypdf/filters.py b/pypdf/filters.py index a3f87ad401..9b1a958683 100644 --- a/pypdf/filters.py +++ b/pypdf/filters.py @@ -759,30 +759,31 @@ def decode_stream_data(stream: Any) -> bytes: # If there is no data to decode, we should not try to decode it. if not data: return data - for filter_name, params in zip(filters, decode_parms): - if isinstance(params, NullObject): - params = {} + for filter_name, params_untyped in zip(filters, decode_parms): + params_typed: Optional[DictionaryObject] = None + if not isinstance(params_untyped, NullObject): + params_typed = cast(Optional[DictionaryObject], params_untyped) if filter_name in (FT.ASCII_HEX_DECODE, FTA.AHx): data = ASCIIHexDecode.decode(data) elif filter_name in (FT.ASCII_85_DECODE, FTA.A85): data = ASCII85Decode.decode(data) elif filter_name in (FT.LZW_DECODE, FTA.LZW): - data = LZWDecode.decode(data, params) + data = LZWDecode.decode(data, params_typed) elif filter_name in (FT.FLATE_DECODE, FTA.FL): - data = FlateDecode.decode(data, params) + data = FlateDecode.decode(data, params_typed) elif filter_name in (FT.RUN_LENGTH_DECODE, FTA.RL): data = RunLengthDecode.decode(data) elif filter_name == FT.CCITT_FAX_DECODE: height = stream.get(IA.HEIGHT, ()) - data = CCITTFaxDecode.decode(data, params, height) + data = CCITTFaxDecode.decode(data, params_typed, height) elif filter_name == FT.DCT_DECODE: data = DCTDecode.decode(data) elif filter_name == FT.JPX_DECODE: data = JPXDecode.decode(data) elif filter_name == FT.JBIG2_DECODE: - data = JBIG2Decode.decode(data, params) + data = JBIG2Decode.decode(data, params_typed) elif filter_name == "/Crypt": - if "/Name" in params or "/Type" in params: + if "/Name" in params_untyped or "/Type" in params_untyped: raise NotImplementedError( "/Crypt filter with /Name or /Type not supported yet" ) diff --git a/pypdf/generic/_files.py b/pypdf/generic/_files.py index f29fa770f6..8a6e8b5469 100644 --- a/pypdf/generic/_files.py +++ b/pypdf/generic/_files.py @@ -394,8 +394,8 @@ def _load(cls, catalog: DictionaryObject) -> Generator[EmbeddedFile]: for kid in cast(ArrayObject, container["/Kids"].get_object()): # There might be further (nested) kids here. # Wait for an example before evaluating an implementation. - kid = kid.get_object() - if "/Names" in kid: - yield from cls._load_from_names(cast(ArrayObject, kid["/Names"])) + kid_object = kid.get_object() + if "/Names" in kid_object: + yield from cls._load_from_names(cast(ArrayObject, kid_object["/Names"])) if "/Names" in container: yield from cls._load_from_names(cast(ArrayObject, container["/Names"])) diff --git a/pyproject.toml b/pyproject.toml index f0ba519cf4..462d20c388 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -173,7 +173,6 @@ ignore = [ "PERF203", # `try`-`except` within a loop incurs performance overhead "PGH003", # Use specific rule codes when ignoring type issues "PLW1510", # `subprocess.run` without explicit `check` argument - "PLW2901", # `with` statement variable `img` overwritten by assignment target "PT011", # `pytest.raises(ValueError)` is too broad, set the `match` "PT012", # `pytest.raises()` block should contain a single simple statement "PT014", # Ruff bug: Duplicate of test case at index 1 in `@pytest_mark.parametrize` diff --git a/tests/scripts/test_make_release.py b/tests/scripts/test_make_release.py index 16cb2e01ee..7371e2547f 100644 --- a/tests/scripts/test_make_release.py +++ b/tests/scripts/test_make_release.py @@ -41,8 +41,8 @@ def test_strip_header(data, expected): def test_get_git_commits_since_tag(): make_release = pytest.importorskip("make_release") - with open(COMMITS__VERSION_4_0_1, mode="rb") as commits, mock.patch( - "urllib.request.urlopen", side_effect=lambda _: commits + with open(COMMITS__VERSION_4_0_1, mode="rb") as commits_fh, mock.patch( + "urllib.request.urlopen", side_effect=lambda _: commits_fh ), mock.patch("subprocess.check_output", return_value=GIT_LOG__VERSION_4_0_1): commits = make_release.get_git_commits_since_tag("4.0.1") assert commits == [ @@ -87,8 +87,8 @@ def test_get_git_commits_since_tag(): def test_get_formatted_changes(): make_release = pytest.importorskip("make_release") - with open(COMMITS__VERSION_4_0_1, mode="rb") as commits, mock.patch( - "urllib.request.urlopen", side_effect=lambda _: commits + with open(COMMITS__VERSION_4_0_1, mode="rb") as commits_fh, mock.patch( + "urllib.request.urlopen", side_effect=lambda _: commits_fh ), mock.patch("subprocess.check_output", return_value=GIT_LOG__VERSION_4_0_1): output, output_with_user = make_release.get_formatted_changes("4.0.1") @@ -126,8 +126,6 @@ def test_get_formatted_changes(): - Avoid catching not emitted warnings (#2429) by @stefan6419846 """ ) - - def test_get_formatted_changes__other(): make_release = pytest.importorskip("make_release") diff --git a/tests/test_filters.py b/tests/test_filters.py index a79e0ea217..b6b3468deb 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -82,9 +82,9 @@ def test_flatedecode_unsupported_predictor(): predictors = (-10, -1, 0, 9, 16, 20, 100) for predictor, s in cartesian_product(predictors, filter_inputs): - s = s.encode() + s_bytes = s.encode() with pytest.raises(PdfReadError): - codec.decode(codec.encode(s), DictionaryObject({"/Predictor": predictor})) + codec.decode(codec.encode(s_bytes), DictionaryObject({"/Predictor": predictor})) @pytest.mark.parametrize( diff --git a/tests/test_images.py b/tests/test_images.py index dd4ccebefb..f8a48c910b 100644 --- a/tests/test_images.py +++ b/tests/test_images.py @@ -32,9 +32,9 @@ def open_image(path: Union[Path, Image.Image, BytesIO]) -> Image.Image: else: if isinstance(path, Path): assert path.exists() - with Image.open(path) as img: + with Image.open(path) as opened_img: img = ( - img.copy() + opened_img.copy() ) # Opened image should be copied to avoid issues with file closing return img diff --git a/tests/test_writer.py b/tests/test_writer.py index c76f76a443..192c52f450 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -2422,10 +2422,10 @@ def test_no_resource_for_14_std_fonts(caplog): writer = PdfWriter(BytesIO(get_data_from_url(url, name=name))) p = writer.pages[0] for a in p["/Annots"]: - a = a.get_object() - if a["/FT"] == "/Tx": + a_obj = a.get_object() + if a_obj["/FT"] == "/Tx": writer.update_page_form_field_values( - p, {a["/T"]: "Brooks"}, auto_regenerate=False + p, {a_obj["/T"]: "Brooks"}, auto_regenerate=False ) assert "Font dictionary for /Helvetica not found." in caplog.text