|
1 | 1 | //! End-to-end integration test for XLSX metadata extraction |
2 | 2 | #![cfg(feature = "excel")] |
3 | 3 |
|
4 | | -use kreuzberg::extraction::excel::read_excel_file; |
| 4 | +use kreuzberg::extraction::excel::{excel_to_markdown, excel_to_text, read_excel_file}; |
5 | 5 |
|
6 | 6 | #[test] |
7 | 7 | fn test_xlsx_full_metadata_extraction() { |
@@ -144,3 +144,42 @@ fn test_xlsx_excel_solver_extreme_dimensions_no_oom() { |
144 | 144 | ); |
145 | 145 | println!(" Successfully handled dimension A1:XFD1048575 without OOM"); |
146 | 146 | } |
| 147 | + |
| 148 | +/// Regression test for #405: XLSX extraction with output_format=Markdown |
| 149 | +/// should produce markdown tables with pipe delimiters and separator rows, |
| 150 | +/// not plain space-separated text. |
| 151 | +#[test] |
| 152 | +fn test_xlsx_markdown_vs_plain_output() { |
| 153 | + let workspace_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR")) |
| 154 | + .parent() |
| 155 | + .expect("Operation failed") |
| 156 | + .parent() |
| 157 | + .expect("Operation failed"); |
| 158 | + let test_file = workspace_root.join("test_documents/xlsx/excel_multi_sheet.xlsx"); |
| 159 | + |
| 160 | + if !test_file.exists() { |
| 161 | + println!("Skipping test: Test file not found at {:?}", test_file); |
| 162 | + return; |
| 163 | + } |
| 164 | + |
| 165 | + let file_path = test_file.to_str().expect("File path should be valid UTF-8"); |
| 166 | + let workbook = read_excel_file(file_path).expect("Should extract XLSX successfully"); |
| 167 | + |
| 168 | + // excel_to_markdown should produce tables with | delimiters |
| 169 | + let md_content = excel_to_markdown(&workbook); |
| 170 | + assert!( |
| 171 | + md_content.contains("| "), |
| 172 | + "Markdown output should contain table pipe delimiters" |
| 173 | + ); |
| 174 | + assert!( |
| 175 | + md_content.contains("---"), |
| 176 | + "Markdown output should contain separator rows" |
| 177 | + ); |
| 178 | + |
| 179 | + // excel_to_text should produce space-separated text (no pipes) |
| 180 | + let text_content = excel_to_text(&workbook); |
| 181 | + assert!( |
| 182 | + !text_content.contains("| "), |
| 183 | + "Plain text output should not contain table pipe delimiters" |
| 184 | + ); |
| 185 | +} |
0 commit comments