88from io import BytesIO
99from itertools import product as cartesian_product
1010from pathlib import Path
11- from typing import cast
11+ from unittest . mock import patch
1212
1313import pytest
1414from PIL import Image , ImageOps
1515
1616from pypdf import PdfReader
17- from pypdf .constants import FilterTypeAbbreviations as FTA
18- from pypdf .constants import FilterTypes as FT
19- from pypdf .constants import StreamAttributes as SA
2017from pypdf .errors import DeprecationError , PdfReadError
2118from pypdf .filters import (
2219 ASCII85Decode ,
4845RESOURCE_ROOT = PROJECT_ROOT / "resources"
4946
5047
51- # Helper function for subprocess testing without brotli
52- def _run_script_without_brotli (tmp_path , script_content ) -> None :
53- env = os .environ .copy ()
54- env ["COVERAGE_PROCESS_START" ] = str (PROJECT_ROOT / "pyproject.toml" ) # Ensure coverage
55-
56- source_file = tmp_path / "script_no_brotli.py"
57- source_file .write_text (script_content )
58-
59- try :
60- env ["PYTHONPATH" ] = str (PROJECT_ROOT ) + os .pathsep + env ["PYTHONPATH" ]
61- except KeyError :
62- env ["PYTHONPATH" ] = str (PROJECT_ROOT )
63-
64- result = subprocess .run ( # noqa: S603
65- [shutil .which ("python" ), source_file ],
66- capture_output = True ,
67- env = env ,
68- cwd = PROJECT_ROOT , # Run from project root
69- )
70- # Check stderr for unexpected errors from the subprocess itself
71- if result .stderr :
72- pass # Print removed for committed code
73- assert result .returncode == 0 , f"Subprocess failed with exit code { result .returncode } "
74- # Allow specific stdout messages if needed, otherwise assert empty
75- # assert result.stdout == b"", "Subprocess produced unexpected stdout"
76- # Allow specific stderr messages if needed, otherwise assert empty
77- # assert result.stderr == b"", "Subprocess produced unexpected stderr"
7848
7949
8050@pytest .mark .parametrize (("predictor" , "s" ), list (cartesian_product ([1 ], filter_inputs )))
@@ -95,54 +65,38 @@ def test_brotli_decode_encode(s):
9565 assert encoded != s_bytes # Ensure encoding actually happened
9666 decoded = codec .decode (encoded )
9767 assert decoded == s_bytes
68+ @patch ("pypdf.filters.brotli" , None )
69+ def test_brotli_missing_installation_mocked ():
70+ """Verify BrotliDecode raises ImportError if brotli is not installed (using sys.modules patch)."""
71+ from unittest .mock import patch
9872
73+ with patch .dict ("sys.modules" , {"brotli" : None }):
74+ # Import pypdf.filters *after* patching sys.modules
75+ import pypdf .filters
76+ from pypdf .generic import DictionaryObject , NameObject
9977
100- def test_brotli_decode_without_brotli_installed_subprocess (tmp_path ):
101- """Verify BrotliDecode.decode raises ImportError via subprocess if brotli is not installed."""
102- script = """
103- import sys
104- import pytest
105- from pypdf.filters import BrotliDecode
78+ # Test direct decode call
79+ codec = pypdf .filters .BrotliDecode ()
80+ with pytest .raises (ImportError ) as exc_info_decode :
81+ codec .decode (b"test data" )
82+ assert "Brotli library not installed" in str (exc_info_decode .value )
10683
107- # Simulate brotli not being installed
108- sys.modules["brotli"] = None
109- # Need to reload filters to make the None effective inside the module
110- import importlib
111- import pypdf.filters
112- importlib.reload(pypdf.filters)
84+ # Test direct encode call
85+ with pytest .raises (ImportError ) as exc_info_encode :
86+ codec .encode (b"test data" )
87+ assert "Brotli library not installed" in str (exc_info_encode .value )
11388
114- codec = pypdf.filters.BrotliDecode()
115- with pytest.raises(ImportError) as exc_info:
116- codec.decode(b"test data")
89+ # Test call via decode_stream_data
90+ stream = DictionaryObject ()
91+ stream [NameObject ("/Filter" )] = NameObject ("/BrotliDecode" )
92+ stream ._data = b"dummy compressed data"
93+ with pytest .raises (ImportError ) as exc_info_stream :
94+ pypdf .filters .decode_stream_data (stream )
95+ assert "Brotli library not installed" in str (exc_info_stream .value )
11796
118- assert "Brotli library not installed. Required for BrotliDecode filter." in str(exc_info.value)
119- print("Test finished successfully: decode without brotli") # Add print to confirm script completion
120- """
121- _run_script_without_brotli (tmp_path , script )
12297
12398
124- def test_brotli_encode_without_brotli_installed_subprocess (tmp_path ):
125- """Verify BrotliDecode.encode raises ImportError via subprocess if brotli is not installed."""
126- script = """
127- import sys
128- import pytest
129- from pypdf.filters import BrotliDecode
130-
131- # Simulate brotli not being installed
132- sys.modules["brotli"] = None
133- # Need to reload filters to make the None effective inside the module
134- import importlib
135- import pypdf.filters
136- importlib.reload(pypdf.filters)
13799
138- codec = pypdf.filters.BrotliDecode()
139- with pytest.raises(ImportError) as exc_info:
140- codec.encode(b"test data")
141-
142- assert "Brotli library not installed. Required for BrotliDecode filter." in str(exc_info.value)
143- print("Test finished successfully: encode without brotli") # Add print to confirm script completion
144- """
145- _run_script_without_brotli (tmp_path , script )
146100
147101
148102def test_flatedecode_unsupported_predictor ():
@@ -312,7 +266,9 @@ class Pdf:
312266 def get_object (self , reference ) -> NumberObject :
313267 return NumberObject (42 )
314268
315- parameters = CCITTFaxDecode ._get_parameters (parameters = None , rows = IndirectObject (13 , 1 , Pdf ()))
269+ parameters = CCITTFaxDecode ._get_parameters (
270+ parameters = None , rows = IndirectObject (13 , 1 , Pdf ())
271+ )
316272 assert parameters .rows == 42
317273
318274
@@ -774,94 +730,24 @@ def test_flate_decode__not_rectangular(caplog):
774730 assert caplog .messages == ["Image data is not rectangular. Adding padding." ]
775731
776732
777- def test_main_decode_brotli_without_brotli_installed_subprocess (tmp_path ):
778- """Test decode_stream_data raises ImportError via subprocess if brotli is not installed."""
779- original_data = b"some data to be compressed with brotli"
780- # We need brotli here in the main process to create the test data
781- try :
782- import brotli
783733
784- compressed_data = brotli .compress (original_data )
785- except ImportError :
786- pytest .skip ("brotli library not installed in the main test environment" )
787734
788- script = f"""
789- import sys
790- import pytest
791- from pypdf import filters
792- from pypdf.generic import DictionaryObject, NameObject
793-
794- # Simulate brotli not being installed
795- sys.modules["brotli"] = None
796- # Need to reload filters to make the None effective inside the module
797- import importlib
798- import pypdf.filters
799- importlib.reload(pypdf.filters)
800-
801- # Simulate a stream dictionary indicating BrotliDecode
802- stream = DictionaryObject()
803- stream[NameObject("/Filter")] = NameObject("/BrotliDecode")
804- # Pass compressed data as hex to avoid encoding issues in the script string
805- stream._data = bytes.fromhex('{ compressed_data .hex ()} ')
806-
807- # Call the main decode function and expect an error
808- with pytest.raises(ImportError) as exc_info:
809- filters.decode_stream_data(stream)
810-
811- assert "Brotli library not installed. Required for BrotliDecode filter." in str(exc_info.value)
812- print("Test finished successfully: main decode without brotli") # Add print to confirm script completion
813- """
814- _run_script_without_brotli (tmp_path , script )
815-
816-
817- # Renamed from test_main_decode_brotli
818735def test_main_decode_brotli_installed ():
819736 """Test the main decode function with Brotli filter using a real PDF."""
820737 if importlib .util .find_spec ("brotli" ) is None :
821738 pytest .skip ("brotli library not installed" )
822739
823- # Use the prototype PDF provided by PDF Association
740+ # Use the test PDF generated by resources/create_brotli_test_pdf.py
824741 pdf_path = RESOURCE_ROOT / "brotli-test-pdfs" / "minimal-brotli-compressed.pdf"
825- if not pdf_path .exists ():
826- pytest .skip (f"Brotli test PDF not found at { pdf_path } " )
827742
828743 reader = PdfReader (pdf_path )
829- # Assuming the first page's content stream uses Brotli
830- # Access the raw stream object. Need to get the indirect object first.
831744 page = reader .pages [0 ]
832- content_stream_ref = page [NameObject ("/Contents" )]
833- # Handle cases where /Contents might be an array
834- if isinstance (content_stream_ref , ArrayObject ):
835- # For simplicity, let's assume the first stream in the array uses Brotli
836- # A more robust test might check all streams or find one specifically with /BrotliDecode
837- if not content_stream_ref :
838- pytest .skip ("Content stream array is empty." )
839- stream_obj = content_stream_ref [0 ].get_object ()
840- else :
841- stream_obj = content_stream_ref .get_object ()
842-
843- # Check if the stream actually uses BrotliDecode
844- filters = stream_obj .get (SA .FILTER , ())
845- if isinstance (filters , IndirectObject ):
846- filters = cast (ArrayObject , filters .get_object ())
847- if not isinstance (filters , (ArrayObject , list )):
848- filters = (filters ,)
849-
850- if FT .BROTLI_DECODE not in filters and FTA .BR not in filters :
851- pytest .skip ("Selected stream does not use BrotliDecode filter." )
852-
853- # Call the main decode function directly on the stream object
854- from pypdf import filters
855745
746+ # Extract text - this will implicitly use the BrotliDecode filter
856747 try :
857- decoded_data = filters . decode_stream_data ( stream_obj )
748+ extracted_text = page . extract_text ( )
858749 except Exception as e :
859- pytest .fail (f"decode_stream_data failed with error: { e } " )
860-
861- # Since we don't know the exact content, assert that decoding succeeded
862- # and returned some non-empty data.
863- assert isinstance (decoded_data , bytes )
864- assert len (decoded_data ) > 0
865- # We could add a basic check, e.g., if we expect text content
866- # assert b"some_expected_keyword" in decoded_data
867- # But without knowing the content, checking non-empty is the safest bet.
750+ pytest .fail (f"page.extract_text() failed with error: { e } " )
751+
752+ # Verify the expected text content
753+ assert extracted_text .strip () == "Hello, Brotli!"
0 commit comments