Skip to content

Commit e19cdf3

Browse files
committed
test(excel): add regression test for XLSX markdown vs plain output (#405)
Verifies excel_to_markdown produces table syntax with pipe delimiters and separator rows, while excel_to_text produces plain space-separated content without markdown formatting.
1 parent 0e9b8ee commit e19cdf3

File tree

1 file changed

+40
-1
lines changed

1 file changed

+40
-1
lines changed

crates/kreuzberg/tests/xlsx_metadata_extraction_test.rs

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//! End-to-end integration test for XLSX metadata extraction
22
#![cfg(feature = "excel")]
33

4-
use kreuzberg::extraction::excel::read_excel_file;
4+
use kreuzberg::extraction::excel::{excel_to_markdown, excel_to_text, read_excel_file};
55

66
#[test]
77
fn test_xlsx_full_metadata_extraction() {
@@ -144,3 +144,42 @@ fn test_xlsx_excel_solver_extreme_dimensions_no_oom() {
144144
);
145145
println!(" Successfully handled dimension A1:XFD1048575 without OOM");
146146
}
147+
148+
/// Regression test for #405: XLSX extraction with output_format=Markdown
149+
/// should produce markdown tables with pipe delimiters and separator rows,
150+
/// not plain space-separated text.
151+
#[test]
152+
fn test_xlsx_markdown_vs_plain_output() {
153+
let workspace_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
154+
.parent()
155+
.expect("Operation failed")
156+
.parent()
157+
.expect("Operation failed");
158+
let test_file = workspace_root.join("test_documents/xlsx/excel_multi_sheet.xlsx");
159+
160+
if !test_file.exists() {
161+
println!("Skipping test: Test file not found at {:?}", test_file);
162+
return;
163+
}
164+
165+
let file_path = test_file.to_str().expect("File path should be valid UTF-8");
166+
let workbook = read_excel_file(file_path).expect("Should extract XLSX successfully");
167+
168+
// excel_to_markdown should produce tables with | delimiters
169+
let md_content = excel_to_markdown(&workbook);
170+
assert!(
171+
md_content.contains("| "),
172+
"Markdown output should contain table pipe delimiters"
173+
);
174+
assert!(
175+
md_content.contains("---"),
176+
"Markdown output should contain separator rows"
177+
);
178+
179+
// excel_to_text should produce space-separated text (no pipes)
180+
let text_content = excel_to_text(&workbook);
181+
assert!(
182+
!text_content.contains("| "),
183+
"Plain text output should not contain table pipe delimiters"
184+
);
185+
}

0 commit comments

Comments
 (0)