88from io import BytesIO
99from itertools import product as cartesian_product
1010from pathlib import Path
11- from typing import cast
11+ from unittest . mock import patch
1212
1313import pytest
1414from PIL import Image , ImageOps
1515
1616from pypdf import PdfReader
17- from pypdf .constants import FilterTypeAbbreviations as FTA
18- from pypdf .constants import FilterTypes as FT
19- from pypdf .constants import StreamAttributes as SA
2017from pypdf .errors import DeprecationError , PdfReadError
2118from pypdf .filters import (
2219 ASCII85Decode ,
4845RESOURCE_ROOT = PROJECT_ROOT / "resources"
4946
5047
51- # Helper function for subprocess testing without brotli
52- def _run_script_without_brotli (tmp_path , script_content ) -> None :
53- env = os .environ .copy ()
54- env ["COVERAGE_PROCESS_START" ] = str (PROJECT_ROOT / "pyproject.toml" ) # Ensure coverage
55-
56- source_file = tmp_path / "script_no_brotli.py"
57- source_file .write_text (script_content )
58-
59- try :
60- env ["PYTHONPATH" ] = str (PROJECT_ROOT ) + os .pathsep + env ["PYTHONPATH" ]
61- except KeyError :
62- env ["PYTHONPATH" ] = str (PROJECT_ROOT )
63-
64- result = subprocess .run ( # noqa: S603
65- [shutil .which ("python" ), source_file ],
66- capture_output = True ,
67- env = env ,
68- cwd = PROJECT_ROOT , # Run from project root
69- )
70- # Check stderr for unexpected errors from the subprocess itself
71- if result .stderr :
72- pass # Print removed for committed code
73- assert result .returncode == 0 , f"Subprocess failed with exit code { result .returncode } "
74- # Allow specific stdout messages if needed, otherwise assert empty
75- # assert result.stdout == b"", "Subprocess produced unexpected stdout"
76- # Allow specific stderr messages if needed, otherwise assert empty
77- # assert result.stderr == b"", "Subprocess produced unexpected stderr"
7848
7949
8050@pytest .mark .parametrize (("predictor" , "s" ), list (cartesian_product ([1 ], filter_inputs )))
@@ -95,54 +65,42 @@ def test_brotli_decode_encode(s):
9565 assert encoded != s_bytes # Ensure encoding actually happened
9666 decoded = codec .decode (encoded )
9767 assert decoded == s_bytes
68+ @patch ("pypdf.filters.brotli" , None )
69+ def test_brotli_missing_installation_mocked ():
70+ """Verify BrotliDecode raises ImportError if brotli is not installed (using mock)."""
71+ # Need to reload the filters module AFTER the patch is active
72+ # so that the 'brotli is None' check uses the mocked value
73+ import importlib
9874
75+ import pypdf .filters
76+ from pypdf .generic import DictionaryObject , NameObject
77+ importlib .reload (pypdf .filters ) # Reload to see the patched 'None'
9978
100- def test_brotli_decode_without_brotli_installed_subprocess (tmp_path ):
101- """Verify BrotliDecode.decode raises ImportError via subprocess if brotli is not installed."""
102- script = """
103- import sys
104- import pytest
105- from pypdf.filters import BrotliDecode
106-
107- # Simulate brotli not being installed
108- sys.modules["brotli"] = None
109- # Need to reload filters to make the None effective inside the module
110- import importlib
111- import pypdf.filters
112- importlib.reload(pypdf.filters)
79+ # Test direct decode call
80+ codec = pypdf .filters .BrotliDecode ()
81+ with pytest .raises (ImportError ) as exc_info_decode :
82+ codec .decode (b"test data" )
83+ assert "Brotli library not installed" in str (exc_info_decode .value )
11384
114- codec = pypdf.filters.BrotliDecode()
115- with pytest.raises(ImportError) as exc_info:
116- codec.decode(b"test data")
85+ # Test direct encode call
86+ with pytest .raises (ImportError ) as exc_info_encode :
87+ codec .encode (b"test data" )
88+ assert "Brotli library not installed" in str (exc_info_encode .value )
11789
118- assert "Brotli library not installed. Required for BrotliDecode filter." in str(exc_info.value)
119- print("Test finished successfully: decode without brotli") # Add print to confirm script completion
120- """
121- _run_script_without_brotli (tmp_path , script )
90+ # Test call via decode_stream_data
91+ stream = DictionaryObject ()
92+ stream [NameObject ("/Filter" )] = NameObject ("/BrotliDecode" )
93+ stream ._data = b"dummy compressed data" # Data doesn't matter as decode won't run
94+ with pytest .raises (ImportError ) as exc_info_stream :
95+ pypdf .filters .decode_stream_data (stream )
96+ assert "Brotli library not installed" in str (exc_info_stream .value )
12297
98+ # Important: Reload again after the test to restore the original state for other tests
99+ importlib .reload (pypdf .filters )
123100
124- def test_brotli_encode_without_brotli_installed_subprocess (tmp_path ):
125- """Verify BrotliDecode.encode raises ImportError via subprocess if brotli is not installed."""
126- script = """
127- import sys
128- import pytest
129- from pypdf.filters import BrotliDecode
130101
131- # Simulate brotli not being installed
132- sys.modules["brotli"] = None
133- # Need to reload filters to make the None effective inside the module
134- import importlib
135- import pypdf.filters
136- importlib.reload(pypdf.filters)
137102
138- codec = pypdf.filters.BrotliDecode()
139- with pytest.raises(ImportError) as exc_info:
140- codec.encode(b"test data")
141103
142- assert "Brotli library not installed. Required for BrotliDecode filter." in str(exc_info.value)
143- print("Test finished successfully: encode without brotli") # Add print to confirm script completion
144- """
145- _run_script_without_brotli (tmp_path , script )
146104
147105
148106def test_flatedecode_unsupported_predictor ():
@@ -312,7 +270,9 @@ class Pdf:
312270 def get_object (self , reference ) -> NumberObject :
313271 return NumberObject (42 )
314272
315- parameters = CCITTFaxDecode ._get_parameters (parameters = None , rows = IndirectObject (13 , 1 , Pdf ()))
273+ parameters = CCITTFaxDecode ._get_parameters (
274+ parameters = None , rows = IndirectObject (13 , 1 , Pdf ())
275+ )
316276 assert parameters .rows == 42
317277
318278
@@ -774,94 +734,24 @@ def test_flate_decode__not_rectangular(caplog):
774734 assert caplog .messages == ["Image data is not rectangular. Adding padding." ]
775735
776736
777- def test_main_decode_brotli_without_brotli_installed_subprocess (tmp_path ):
778- """Test decode_stream_data raises ImportError via subprocess if brotli is not installed."""
779- original_data = b"some data to be compressed with brotli"
780- # We need brotli here in the main process to create the test data
781- try :
782- import brotli
783737
784- compressed_data = brotli .compress (original_data )
785- except ImportError :
786- pytest .skip ("brotli library not installed in the main test environment" )
787738
788- script = f"""
789- import sys
790- import pytest
791- from pypdf import filters
792- from pypdf.generic import DictionaryObject, NameObject
793-
794- # Simulate brotli not being installed
795- sys.modules["brotli"] = None
796- # Need to reload filters to make the None effective inside the module
797- import importlib
798- import pypdf.filters
799- importlib.reload(pypdf.filters)
800-
801- # Simulate a stream dictionary indicating BrotliDecode
802- stream = DictionaryObject()
803- stream[NameObject("/Filter")] = NameObject("/BrotliDecode")
804- # Pass compressed data as hex to avoid encoding issues in the script string
805- stream._data = bytes.fromhex('{ compressed_data .hex ()} ')
806-
807- # Call the main decode function and expect an error
808- with pytest.raises(ImportError) as exc_info:
809- filters.decode_stream_data(stream)
810-
811- assert "Brotli library not installed. Required for BrotliDecode filter." in str(exc_info.value)
812- print("Test finished successfully: main decode without brotli") # Add print to confirm script completion
813- """
814- _run_script_without_brotli (tmp_path , script )
815-
816-
817- # Renamed from test_main_decode_brotli
818739def test_main_decode_brotli_installed ():
819740 """Test the main decode function with Brotli filter using a real PDF."""
820741 if importlib .util .find_spec ("brotli" ) is None :
821742 pytest .skip ("brotli library not installed" )
822743
823- # Use the prototype PDF provided by PDF Association
744+ # Use the test PDF generated by resources/create_brotli_test_pdf.py
824745 pdf_path = RESOURCE_ROOT / "brotli-test-pdfs" / "minimal-brotli-compressed.pdf"
825- if not pdf_path .exists ():
826- pytest .skip (f"Brotli test PDF not found at { pdf_path } " )
827746
828747 reader = PdfReader (pdf_path )
829- # Assuming the first page's content stream uses Brotli
830- # Access the raw stream object. Need to get the indirect object first.
831748 page = reader .pages [0 ]
832- content_stream_ref = page [NameObject ("/Contents" )]
833- # Handle cases where /Contents might be an array
834- if isinstance (content_stream_ref , ArrayObject ):
835- # For simplicity, let's assume the first stream in the array uses Brotli
836- # A more robust test might check all streams or find one specifically with /BrotliDecode
837- if not content_stream_ref :
838- pytest .skip ("Content stream array is empty." )
839- stream_obj = content_stream_ref [0 ].get_object ()
840- else :
841- stream_obj = content_stream_ref .get_object ()
842-
843- # Check if the stream actually uses BrotliDecode
844- filters = stream_obj .get (SA .FILTER , ())
845- if isinstance (filters , IndirectObject ):
846- filters = cast (ArrayObject , filters .get_object ())
847- if not isinstance (filters , (ArrayObject , list )):
848- filters = (filters ,)
849-
850- if FT .BROTLI_DECODE not in filters and FTA .BR not in filters :
851- pytest .skip ("Selected stream does not use BrotliDecode filter." )
852-
853- # Call the main decode function directly on the stream object
854- from pypdf import filters
855749
750+ # Extract text - this will implicitly use the BrotliDecode filter
856751 try :
857- decoded_data = filters . decode_stream_data ( stream_obj )
752+ extracted_text = page . extract_text ( )
858753 except Exception as e :
859- pytest .fail (f"decode_stream_data failed with error: { e } " )
860-
861- # Since we don't know the exact content, assert that decoding succeeded
862- # and returned some non-empty data.
863- assert isinstance (decoded_data , bytes )
864- assert len (decoded_data ) > 0
865- # We could add a basic check, e.g., if we expect text content
866- # assert b"some_expected_keyword" in decoded_data
867- # But without knowing the content, checking non-empty is the safest bet.
754+ pytest .fail (f"page.extract_text() failed with error: { e } " )
755+
756+ # Verify the expected text content
757+ assert extracted_text .strip () == "Hello Brotli!"
0 commit comments