1212
1313DIRECTORY = pathlib .Path (__file__ ).parent .resolve ()
1414
15+ EXPECTED_OUTPUT_LANGUAGE_DE = [
16+ Title (text = "Jahresabschluss zum Geschäftsjahr vom 01.01.2020 bis zum 31.12.2020" ),
17+ ]
18+
1519
1620def test_partition_html_from_filename ():
1721 directory = os .path .join (DIRECTORY , ".." , ".." , "example-docs" )
@@ -47,14 +51,16 @@ def test_partition_html_from_filename_raises_encoding_error(filename, encoding,
4751
4852@pytest .mark .parametrize (
4953 "filename" ,
50- ["example-10k-utf-16.html" , "example-steelJIS-datasheet-utf-16.html" ],
54+ ["example-10k-utf-16.html" , "example-steelJIS-datasheet-utf-16.html" , "fake-html-lang-de.html" ],
5155)
5256def test_partition_html_from_filename_default_encoding (filename ):
5357 filename_path = os .path .join (DIRECTORY , ".." , ".." , "example-docs" , filename )
5458 elements = partition_html (filename = filename_path )
5559 assert len (elements ) > 0
5660 for element in elements :
5761 assert element .metadata .filename == filename
62+ if filename == "fake-html-lang-de.html" :
63+ assert elements == EXPECTED_OUTPUT_LANGUAGE_DE
5864
5965
6066def test_partition_html_from_filename_metadata_false ():
@@ -108,13 +114,15 @@ def test_partition_html_from_file_raises_encoding_error(filename, encoding, erro
108114
109115@pytest .mark .parametrize (
110116 "filename" ,
111- ["example-10k-utf-16.html" , "example-steelJIS-datasheet-utf-16.html" ],
117+ ["example-10k-utf-16.html" , "example-steelJIS-datasheet-utf-16.html" , "fake-html-lang-de.html" ],
112118)
113119def test_partition_html_from_file_default_encoding (filename ):
114120 filename = os .path .join (DIRECTORY , ".." , ".." , "example-docs" , filename )
115121 with open (filename ) as f :
116122 elements = partition_html (file = f )
117123 assert len (elements ) > 0
124+ if filename == "fake-html-lang-de.html" :
125+ assert elements == EXPECTED_OUTPUT_LANGUAGE_DE
118126
119127
120128@pytest .mark .parametrize (
@@ -133,13 +141,15 @@ def test_partition_html_from_file_rb_raises_encoding_error(filename, encoding, e
133141
134142@pytest .mark .parametrize (
135143 "filename" ,
136- ["example-10k-utf-16.html" , "example-steelJIS-datasheet-utf-16.html" ],
144+ ["example-10k-utf-16.html" , "example-steelJIS-datasheet-utf-16.html" , "fake-html-lang-de.html" ],
137145)
138146def test_partition_html_from_file_rb_default_encoding (filename ):
139147 filename = os .path .join (DIRECTORY , ".." , ".." , "example-docs" , filename )
140148 with open (filename , "rb" ) as f :
141149 elements = partition_html (file = f )
142150 assert len (elements ) > 0
151+ if filename == "fake-html-lang-de.html" :
152+ assert elements == EXPECTED_OUTPUT_LANGUAGE_DE
143153
144154
145155def test_partition_html_from_text ():
0 commit comments