Skip to content

Commit 7327b10

Browse files
authored
Merge pull request #362 from crishoj/fix/issue-359-docx-list-whitespace
fix(docx): add regression test for #359 list run whitespace
2 parents b2b3c51 + f6d8783 commit 7327b10

File tree

2 files changed

+33
-0
lines changed

2 files changed

+33
-0
lines changed
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
//! Regression test for https://github.com/kreuzberg-dev/kreuzberg/issues/359
2+
//!
3+
//! DOCX list items with multiple text runs should preserve whitespace between runs.
4+
//! e.g. "Sermocination ypsiliform" must not become "Sermocinationypsiliform".
5+
6+
#![cfg(feature = "office")]
7+
8+
use kreuzberg::{ExtractionConfig, extract_file};
9+
10+
#[tokio::test]
11+
async fn test_issue_359_docx_list_run_whitespace() {
12+
let workspace_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
13+
.parent()
14+
.expect("parent")
15+
.parent()
16+
.expect("workspace root");
17+
let test_file = workspace_root.join("test_documents/docx/issue_359_list_whitespace.docx");
18+
19+
if !test_file.exists() {
20+
println!("Skipping test: {:?} not found", test_file);
21+
return;
22+
}
23+
24+
let result = extract_file(&test_file, None, &ExtractionConfig::default())
25+
.await
26+
.expect("Should extract DOCX successfully");
27+
28+
assert!(
29+
result.content.contains("Sermocination ypsiliform"),
30+
"Expected 'Sermocination ypsiliform' with space between runs, got: {:?}",
31+
result.content
32+
);
33+
}
8.96 KB
Binary file not shown.

0 commit comments

Comments
 (0)