Skip to content

Commit 9770d4b

Browse files
committed
Simplify test expectation
Signed-off-by: Philippe Ombredanne <[email protected]>
1 parent 435c92b commit 9770d4b

File tree

1 file changed

+2
-250
lines changed

1 file changed

+2
-250
lines changed

tests/textcode/test_pdf.py

Lines changed: 2 additions & 250 deletions
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,8 @@ def test_get_text_lines_can_parse_faulty_broadcom_doc(self):
8080
def test_pdfminer_can_parse_apache_fop_test_pdf(self):
8181
test_file = self.get_test_loc('pdf/fop_test_pdf_1.5_test.pdf')
8282
result = pdf.get_text_lines(test_file)
83-
if result == apache_fop_expected:
84-
return
85-
assert result == apache_fop_expected_2020
83+
for expected in apache_fop_expected:
84+
assert expected in result
8685

8786
def test_numbered_text_lines_does_not_fail_on_autocad_test_pdf(self):
8887
test_file = self.get_test_loc('pdf/AutoCad_Diagram.pdf')
@@ -92,264 +91,17 @@ def test_numbered_text_lines_does_not_fail_on_autocad_test_pdf(self):
9291

9392
apache_fop_expected = [
9493
b'This is the page header\n',
95-
b'(There\xe2\x80\x99s another page se-\n',
96-
b'quence below.)\n',
97-
b'\n',
9894
b'About Apache FOP\n',
9995
b'It is a print formatter driv-\n',
10096
b'en by XSL formatting ob-\n',
10197
b'jects (XSL-FO) and an out-\n',
102-
b'put \n',
103-
b'format-\n',
104-
b'\n',
105-
b'independent \n',
106-
b'\n',
107-
b'Page 1\n',
108-
b'ter1. FOP has a nice logo:\n',
109-
b'\n',
110-
b'Header 1.1 Header 1.2\n',
111-
b'Cell 1.1\n',
112-
b'\n',
113-
b'Cell 1.2\n',
114-
b'\n',
115-
b'See the FOP website for more information\n',
116-
b'\n',
117-
b'\x0cThis is the page header\n',
118-
b'\n',
119-
b'Header 1.1 Header 1.2\n',
120-
b'Cell 2.1\n',
121-
b'\n',
122-
b'Cell 2.2\n',
123-
b'\n',
124-
b'Page 2\n',
12598
b'(XSL-FO) and an output in-\n',
12699
b'dependent formatter. It is a\n',
127100
b'Java application that reads\n',
128-
b'a formatting object (FO)\n',
129-
b'tree and renders the res-\n',
130-
b'ulting pages to a specified\n',
131-
b'output.\n',
132-
b'\n',
133-
b'Apache FOP (Formatting\n',
134-
b'Objects Processor) is a\n',
135-
b'print formatter driven by\n',
136-
b'XSL \n',
137-
b'formatting objects\n',
138-
b'This fo:block element spans all the columns of the docu-\n',
139-
b'ment. This is intended to test the abilities of the text-to-\n',
140-
b'speech program.\n',
141-
b'And now we are back to\n',
142-
b'normal content flowing in\n',
143-
b'\n',
144-
b'\x0cPage 3\n',
145-
b'\n',
146-
b'This is the page header\n',
147-
b'two columns. Let\xe2\x80\x99s start a\n',
148-
b'numbered list:\n',
149-
b'1. Line 1 of item 1\n',
150-
b'Line 2 of item 1\n',
151-
b'Line 3 of item 1\n',
152-
b'2. Line 1 of item 2\n',
153-
b'Line 2 of item 2\n',
154-
b'Line 3 of item 2\n',
155-
b'\n',
156-
b'And now we are going to\n',
157-
b'see how a second page\n',
158-
b'sequence is handled.\n',
159-
b'\n',
160-
b'\x0cThis is the page header\n',
161-
b'Apache FOP (Formatting\n',
162-
b'Objects Processor) is a\n',
163-
b'print formatter driven by\n',
164-
b'XSL \n',
165-
b'formatting objects\n',
166-
b'(XSL-FO) and an output\n',
167-
b'independent formatter1. It\n',
168-
b'is a Java application that\n',
169-
b'reads a formatting object\n',
170-
b'(FO) tree and renders the\n',
171-
b'\n',
172-
b'Page 4\n',
173-
b'resulting pages to a spe-\n',
174-
b'cified output.\n',
175-
b'\n',
176-
b'Header 1.1 Header 1.2\n',
177-
b'Cell 1.1\n',
178-
b'Cell 2.1\n',
179-
b'\n',
180-
b'Cell 1.2\n',
181-
b'Cell 2.2\n',
182-
b'\n',
183-
b'Apache FOP (Formatting\n',
184-
b'Objects Processor) est\n',
185-
b'une application de mise en\n',
186-
b'page de documents res-\n',
187-
b'pectant le standard XSL-\n',
188-
b'\n',
189-
b'See the FOP website for more information\n',
190-
b'\n',
191-
b'\x0cThis is the page header\n',
192-
b'Page 5\n',
193-
b'FO. \xc3\x80 partir d\xe2\x80\x99un document\n',
194-
b'va effectue une mise en\n',
195-
b'au format XSL-FO, cette\n',
196-
b'page et renvoie un docu-\n',
197-
b'application \xc3\xa9crite en Ja-\n',
198-
b'ment pr\xc3\xaat pour impression.\n',
199-
b'This fo:block element spans all the columns of the docu-\n',
200-
b'ment. This is intended to test the abilities of the text-to-\n',
201-
b'speech program.\n',
202-
b'And now we are back to\n',
203-
b'normal content flowing in\n',
204-
b'two columns. Let\xe2\x80\x99s start a\n',
205-
b'numbered list:\n',
206-
b'1. Line 1 of item 1\n',
207-
b'Line 2 of item 1\n',
208-
b'\n',
209-
b'Line 3 of item 1\n',
210-
b'2. Line 1 of item 2\n',
211-
b'Line 2 of item 2\n',
212-
b'Line 3 of item 2\n',
213-
b'\n',
214-
b'The end of the document\n',
215-
b'has now been reached.\n',
216-
b'\n',
217-
b'\x0c'
218-
]
219-
220-
apache_fop_expected_2020 = [
221-
b'This is the page header\n',
222-
b'\n',
223-
b'(There\xe2\x80\x99s another page se-\n',
224-
b'quence below.)\n',
225-
b'\n',
226-
b'About Apache FOP\n',
227-
b'It is a print formatter driv-\n',
228-
b'en by XSL formatting ob-\n',
229-
b'jects (XSL-FO) and an out-\n',
230-
b'format-\n',
231-
b'put \n',
232-
b'\n',
233-
b'independent \n',
234-
b'\n',
235-
b'Page 1\n',
236-
b'ter1. FOP has a nice logo:\n',
237-
b'\n',
238-
b'Header 1.1 Header 1.2\n',
239-
b'\n',
240-
b'Cell 1.1\n',
241-
b'\n',
242-
b'Cell 1.2\n',
243-
b'\n',
244-
b'See the FOP website for more information\n',
245-
b'\n',
246-
b'\x0cThis is the page header\n',
247-
b'\n',
248-
b'Page 2\n',
249-
b'\n',
250-
b'Cell 2.2\n',
251-
b'\n',
252-
b'Cell 2.1\n',
253-
b'\n',
254-
b'Header 1.1 Header 1.2\n',
255-
b'\n',
256-
b'(XSL-FO) and an output in-\n',
257-
b'dependent formatter. It is a\n',
258-
b'Java application that reads\n',
259-
b'a formatting object (FO)\n',
260-
b'tree and renders the res-\n',
261-
b'ulting pages to a specified\n',
262-
b'output.\n',
263-
b'\n',
264-
b'Apache FOP (Formatting\n',
265-
b'Objects Processor) is a\n',
266-
b'print formatter driven by\n',
267-
b'XSL \n',
268-
b'formatting objects\n',
269-
b'This fo:block element spans all the columns of the docu-\n',
270-
b'ment. This is intended to test the abilities of the text-to-\n',
271-
b'speech program.\n',
272-
b'And now we are back to\n',
273-
b'normal content flowing in\n',
274-
b'\n',
275-
b'\x0cPage 3\n',
276-
b'\n',
277-
b'This is the page header\n',
278-
b'\n',
279-
b'two columns. Let\xe2\x80\x99s start a\n',
280-
b'numbered list:\n',
281-
b'1. Line 1 of item 1\n',
282101
b'Line 2 of item 1\n',
283-
b'Line 3 of item 1\n',
284-
b'2. Line 1 of item 2\n',
285-
b'Line 2 of item 2\n',
286-
b'Line 3 of item 2\n',
287-
b'\n',
288-
b'And now we are going to\n',
289-
b'see how a second page\n',
290-
b'sequence is handled.\n',
291-
b'\n',
292-
b'\x0cThis is the page header\n',
293-
b'\n',
294-
b'Apache FOP (Formatting\n',
295-
b'Objects Processor) is a\n',
296-
b'print formatter driven by\n',
297-
b'XSL \n',
298-
b'formatting objects\n',
299-
b'(XSL-FO) and an output\n',
300-
b'independent formatter1. It\n',
301-
b'is a Java application that\n',
302-
b'reads a formatting object\n',
303-
b'(FO) tree and renders the\n',
304-
b'\n',
305-
b'Page 4\n',
306-
b'\n',
307-
b'resulting pages to a spe-\n',
308-
b'cified output.\n',
309-
b'\n',
310-
b'Header 1.1 Header 1.2\n',
311-
b'\n',
312-
b'Cell 1.1\n',
313-
b'Cell 2.1\n',
314-
b'\n',
315-
b'Cell 1.2\n',
316-
b'Cell 2.2\n',
317-
b'\n',
318102
b'Apache FOP (Formatting\n',
319103
b'Objects Processor) est\n',
320104
b'une application de mise en\n',
321-
b'page de documents res-\n',
322-
b'pectant le standard XSL-\n',
323-
b'\n',
324-
b'See the FOP website for more information\n',
325-
b'\n',
326-
b'\x0cThis is the page header\n',
327-
b'\n',
328-
b'Page 5\n',
329-
b'\n',
330-
b'FO. \xc3\x80 partir d\xe2\x80\x99un document\n',
331-
b'va effectue une mise en\n',
332-
b'au format XSL-FO, cette\n',
333-
b'page et renvoie un docu-\n',
334-
b'ment pr\xc3\xaat pour impression.\n',
335-
b'application \xc3\xa9crite en Ja-\n',
336-
b'This fo:block element spans all the columns of the docu-\n',
337-
b'ment. This is intended to test the abilities of the text-to-\n',
338-
b'speech program.\n',
339-
b'And now we are back to\n',
340-
b'normal content flowing in\n',
341-
b'two columns. Let\xe2\x80\x99s start a\n',
342-
b'numbered list:\n',
343-
b'1. Line 1 of item 1\n',
344-
b'Line 2 of item 1\n',
345-
b'\n',
346-
b'Line 3 of item 1\n',
347-
b'2. Line 1 of item 2\n',
348-
b'Line 2 of item 2\n',
349-
b'Line 3 of item 2\n',
350-
b'\n',
351105
b'The end of the document\n',
352106
b'has now been reached.\n',
353-
b'\n',
354-
b'\x0c',
355107
]

0 commit comments

Comments
 (0)