@@ -214,47 +214,64 @@ def test_image_extraction(src, page_index, image_key, expected):
214214 assert image_similarity (BytesIO (actual_image .data ), expected ) >= 0.99
215215
216216
217- @pytest .mark .parametrize (
218- ("src" , "page_index" , "image_key" , "expected" ),
219- [
220- (
221- SAMPLE_ROOT / "027-onlyoffice-image/Patterns.pdf" ,
222- 0 ,
223- "/Pattern/P1/X1" ,
224- SAMPLE_ROOT / "027-onlyoffice-image/P1_X1.jpg" ,
225- ),
226- (
227- SAMPLE_ROOT / "027-onlyoffice-image/Patterns.pdf" ,
228- 0 ,
229- "/Pattern/P2/X1" ,
230- SAMPLE_ROOT / "027-onlyoffice-image/P2_X1.jpg" ,
231- ),
232- (
233- SAMPLE_ROOT / "027-onlyoffice-image/Patterns.pdf" ,
234- 0 ,
235- "/Pattern/P3/X1" ,
236- SAMPLE_ROOT / "027-onlyoffice-image/P3_X1.jpg" ,
237- ),
238- ],
239- ids = [
240- "027-onlyoffice-image/P1_X1.jpg" ,
241- "027-onlyoffice-image/P2_X1.jpg" ,
242- "027-onlyoffice-image/P3_X1.jpg" ,
243- ],
244- )
245- @pytest .mark .samples ()
246- def test_patterns_image_extraction (src , page_index , image_key , expected ):
247- reader = PdfReader (src )
248- extractedIDs = reader .pages [page_index ].images
217+ @pytest .mark .enable_socket ()
218+ def test_onlyoffice_standard_images_extraction ():
219+ reader = PdfReader (
220+ BytesIO (get_data_from_url (name = "iss2613-onlyoffice-standardImages.pdf" ))
221+ )
249222
250223 assert (
251- str (extractedIDs )
224+ str (reader . pages [ 0 ]. images )
252225 == "[Image_0=/Pattern/P1/X1, Image_1=/Pattern/P2/X1, Image_2=/Pattern/P3/X1]"
253226 )
254227
255- actual_image = reader .pages [page_index ].images [image_key ]
228+ url = "https://github.com/py-pdf/pypdf/assets/67143274/cc28b39b-2e96-4bd3-b33c-c545c5cec2d9"
229+ name = "iss2613-P1_X1.jpg"
230+ P1_X1 = Image .open (BytesIO (get_data_from_url (url , name = name )))
256231
257- assert image_similarity (BytesIO (actual_image .data ), expected ) >= 0.99
232+ assert image_similarity (reader .pages [0 ].images [0 ].image , P1_X1 ) >= 0.99
233+
234+ url = "https://github.com/py-pdf/pypdf/assets/67143274/827c9066-546a-4502-a613-579ec25c598e"
235+ name = "iss2613-P2_X1.jpg"
236+ P2_X1 = Image .open (BytesIO (get_data_from_url (url , name = name )))
237+
238+ assert image_similarity (reader .pages [0 ].images [1 ].image , P2_X1 ) >= 0.99
239+
240+ url = "https://github.com/py-pdf/pypdf/assets/67143274/df9cb9e9-e589-4d2e-a537-ae0fe3240bbd"
241+ name = "iss2613-P3_X1.jpg"
242+ P3_X1 = Image .open (BytesIO (get_data_from_url (url , name = name )))
243+
244+ assert image_similarity (reader .pages [0 ].images [2 ].image , P3_X1 ) >= 0.99
245+
246+
247+ @pytest .mark .samples ()
248+ def test_onlyoffice_form_images_extraction ():
249+ reader = PdfReader (BytesIO (get_data_from_url (name = "iss2613-onlyoffice-form.pdf" )))
250+
251+ assert (
252+ str (reader .pages [0 ].images )
253+ == "[Image_0=/Pattern/P1/X1, Image_1=/Pattern/P2/X1]"
254+ )
255+
256+ assert str (reader .pages [1 ].images ) == "[Image_0=/Pattern/P1/X1]"
257+
258+ url = "https://github.com/py-pdf/pypdf/assets/67143274/cc28b39b-2e96-4bd3-b33c-c545c5cec2d9"
259+ name = "iss2613-P1_X1.jpg"
260+ P1_X1 = Image .open (BytesIO (get_data_from_url (url , name = name )))
261+
262+ assert image_similarity (reader .pages [0 ].images [0 ].image , P1_X1 ) >= 0.99
263+
264+ url = "https://github.com/py-pdf/pypdf/assets/67143274/827c9066-546a-4502-a613-579ec25c598e"
265+ name = "iss2613-P2_X1.jpg"
266+ P2_X1 = Image .open (BytesIO (get_data_from_url (url , name = name )))
267+
268+ assert image_similarity (reader .pages [0 ].images [1 ].image , P2_X1 ) >= 0.99
269+
270+ url = "https://github.com/py-pdf/pypdf/assets/67143274/df9cb9e9-e589-4d2e-a537-ae0fe3240bbd"
271+ name = "iss2613-P3_X1.jpg"
272+ P3_X1 = Image .open (BytesIO (get_data_from_url (url , name = name )))
273+
274+ assert image_similarity (reader .pages [1 ].images [0 ].image , P3_X1 ) >= 0.99
258275
259276
260277@pytest .mark .enable_socket ()
0 commit comments