@@ -61,11 +61,6 @@ def test_partition_docx_from_filename(
6161 assert {element .metadata .detection_origin for element in elements } == {"docx" }
6262
6363
64- def test_partition_docx_from_filename_with_metadata_filename (mock_document_file_path : str ):
65- elements = partition_docx (mock_document_file_path , metadata_filename = "test" )
66- assert all (element .metadata .filename == "test" for element in elements )
67-
68-
6964def test_partition_docx_with_spooled_file (
7065 mock_document_file_path : str , expected_elements : list [Text ]
7166):
@@ -92,16 +87,6 @@ def test_partition_docx_from_file(mock_document_file_path: str, expected_element
9287 assert element .metadata .filename is None
9388
9489
95- def test_partition_docx_from_file_with_metadata_filename (
96- mock_document_file_path : str , expected_elements : list [Text ]
97- ):
98- with open (mock_document_file_path , "rb" ) as f :
99- elements = partition_docx (file = f , metadata_filename = "test" )
100- assert elements == expected_elements
101- for element in elements :
102- assert element .metadata .filename == "test"
103-
104-
10590def test_partition_docx_uses_file_path_when_both_are_specified (
10691 mock_document_file_path : str , expected_elements : list [Text ]
10792):
@@ -221,21 +206,37 @@ def test_partition_docx_detects_lists():
221206 assert sum (1 for e in elements if isinstance (e , ListItem )) == 10
222207
223208
224- def test_partition_docx_from_filename_exclude_metadata ():
209+ # -- `include_metadata` arg ----------------------------------------------------------------------
210+
211+
212+ def test_partition_docx_from_filename_excludes_metadata_when_so_instructed ():
225213 elements = partition_docx (example_doc_path ("handbook-1p.docx" ), include_metadata = False )
214+ assert all (e .metadata .to_dict () == {} for e in elements )
226215
227- assert elements [0 ].metadata .filetype is None
228- assert elements [0 ].metadata .page_name is None
229- assert elements [0 ].metadata .filename is None
230216
217+ def test_partition_docx_from_file_excludes_metadata_when_so_instructed ():
218+ with open (example_doc_path ("simple.docx" ), "rb" ) as f :
219+ assert all (
220+ element .metadata .to_dict () == {}
221+ for element in partition_docx (file = f , include_metadata = False )
222+ )
231223
232- def test_partition_docx_from_file_exclude_metadata (mock_document_file_path : str ):
233- with open (mock_document_file_path , "rb" ) as f :
234- elements = partition_docx (file = f , include_metadata = False )
235224
236- assert elements [0 ].metadata .filetype is None
237- assert elements [0 ].metadata .page_name is None
238- assert elements [0 ].metadata .filename is None
225+ # -- .metadata.filename --------------------------------------------------------------------------
226+
227+
228+ def test_partition_docx_from_filename_prefers_metadata_filename_when_provided ():
229+ elements = partition_docx (example_doc_path ("simple.docx" ), metadata_filename = "test" )
230+ assert all (element .metadata .filename == "test" for element in elements )
231+
232+
233+ def test_partition_docx_from_file_prefers_metadata_filename_when_provided ():
234+ with open (example_doc_path ("simple.docx" ), "rb" ) as f :
235+ elements = partition_docx (file = f , metadata_filename = "test" )
236+ assert all (element .metadata .filename == "test" for element in elements )
237+
238+
239+ # -- .metadata.last_modified ---------------------------------------------------------------------
239240
240241
241242def test_partition_docx_metadata_date (mocker : MockFixture ):
@@ -307,6 +308,9 @@ def test_partition_docx_from_file_without_metadata_date():
307308 assert elements [0 ].metadata .last_modified is None
308309
309310
311+ # ------------------------------------------------------------------------------------------------
312+
313+
310314def test_get_emphasized_texts_from_paragraph (
311315 opts_args : dict [str , Any ], expected_emphasized_texts : list [dict [str , str ]]
312316):
0 commit comments