@@ -56,28 +56,40 @@ def expected_elements():
5656 ]
5757
5858
59- def test_partition_doc_with_filename (mock_document , expected_elements , tmpdir , capsys ):
59+ def test_partition_doc_from_filename (mock_document , expected_elements , tmpdir , capsys ):
6060 docx_filename = os .path .join (tmpdir .dirname , "mock_document.docx" )
6161 doc_filename = os .path .join (tmpdir .dirname , "mock_document.doc" )
6262 mock_document .save (docx_filename )
6363 convert_office_doc (docx_filename , tmpdir .dirname , "doc" )
64-
6564 elements = partition_doc (filename = doc_filename )
6665 assert elements == expected_elements
6766 assert elements [0 ].metadata .filename == "mock_document.doc"
6867 assert elements [0 ].metadata .file_directory == tmpdir .dirname
69-
7068 assert capsys .readouterr ().out == ""
7169 assert capsys .readouterr ().err == ""
7270
7371
74- def test_partition_doc_matches_partition_docx (mock_document , expected_elements , tmpdir ):
72+ def test_partition_doc_from_filename_with_metadata_filename (
73+ mock_document ,
74+ expected_elements ,
75+ tmpdir ,
76+ ):
7577 docx_filename = os .path .join (tmpdir .dirname , "mock_document.docx" )
7678 doc_filename = os .path .join (tmpdir .dirname , "mock_document.doc" )
7779 mock_document .save (docx_filename )
7880 convert_office_doc (docx_filename , tmpdir .dirname , "doc" )
7981
80- partition_doc (filename = doc_filename ) == partition_docx (filename = docx_filename )
82+ elements = partition_doc (filename = doc_filename , metadata_filename = "test" )
83+ assert elements == expected_elements
84+ assert all (element .metadata .filename == "test" for element in elements )
85+
86+
87+ def test_partition_doc_matches_partition_docx (mock_document , expected_elements , tmpdir ):
88+ docx_filename = os .path .join (tmpdir .dirname , "mock_document.docx" )
89+ doc_filename = os .path .join (tmpdir .dirname , "mock_document.doc" )
90+ mock_document .save (docx_filename )
91+ convert_office_doc (docx_filename , tmpdir .dirname , "doc" )
92+ assert partition_doc (filename = doc_filename ) == partition_docx (filename = docx_filename )
8193
8294
8395def test_partition_raises_with_missing_doc (mock_document , expected_elements , tmpdir ):
@@ -87,7 +99,7 @@ def test_partition_raises_with_missing_doc(mock_document, expected_elements, tmp
8799 partition_doc (filename = doc_filename )
88100
89101
90- def test_partition_doc_with_file (mock_document , expected_elements , tmpdir , capsys ):
102+ def test_partition_doc_from_file (mock_document , expected_elements , tmpdir , capsys ):
91103 docx_filename = os .path .join (tmpdir .dirname , "mock_document.docx" )
92104 doc_filename = os .path .join (tmpdir .dirname , "mock_document.doc" )
93105 mock_document .save (docx_filename )
@@ -96,9 +108,22 @@ def test_partition_doc_with_file(mock_document, expected_elements, tmpdir, capsy
96108 with open (doc_filename , "rb" ) as f :
97109 elements = partition_doc (file = f )
98110 assert elements == expected_elements
99-
100111 assert capsys .readouterr ().out == ""
101112 assert capsys .readouterr ().err == ""
113+ for element in elements :
114+ assert element .metadata .filename is None
115+
116+
117+ def test_partition_doc_from_file_with_metadata_filename (mock_document , tmpdir ):
118+ docx_filename = os .path .join (tmpdir .dirname , "mock_document.docx" )
119+ doc_filename = os .path .join (tmpdir .dirname , "mock_document.doc" )
120+ mock_document .save (docx_filename )
121+ convert_office_doc (docx_filename , tmpdir .dirname , "doc" )
122+
123+ with open (doc_filename , "rb" ) as f :
124+ elements = partition_doc (file = f , metadata_filename = "test" )
125+ for element in elements :
126+ assert element .metadata .filename == "test"
102127
103128
104129def test_partition_doc_raises_with_both_specified (mock_document , tmpdir ):
@@ -116,7 +141,7 @@ def test_partition_doc_raises_with_neither():
116141 partition_doc ()
117142
118143
119- def test_partition_doc_with_file_exclude_metadata (mock_document , tmpdir ):
144+ def test_partition_doc_from_file_exclude_metadata (mock_document , tmpdir ):
120145 docx_filename = os .path .join (tmpdir .dirname , "mock_document.docx" )
121146 doc_filename = os .path .join (tmpdir .dirname , "mock_document.doc" )
122147 mock_document .save (docx_filename )
@@ -130,7 +155,7 @@ def test_partition_doc_with_file_exclude_metadata(mock_document, tmpdir):
130155 assert elements [0 ].metadata .filename is None
131156
132157
133- def test_partition_doc_with_filename_exclude_metadata (mock_document , tmpdir ):
158+ def test_partition_doc_from_filename_exclude_metadata (mock_document , tmpdir ):
134159 docx_filename = os .path .join (tmpdir .dirname , "mock_document.docx" )
135160 doc_filename = os .path .join (tmpdir .dirname , "mock_document.doc" )
136161 mock_document .save (docx_filename )
0 commit comments