|
| 1 | +import { describe, it, expect } from "@jest/globals" |
| 2 | +import ExcelJS from "exceljs" |
| 3 | +import { extractTextFromXLSX } from "../extract-text-from-xlsx" |
| 4 | + |
| 5 | +describe("extractTextFromXLSX", () => { |
| 6 | + describe("basic functionality", () => { |
| 7 | + it("should extract text with proper formatting", async () => { |
| 8 | + const workbook = new ExcelJS.Workbook() |
| 9 | + const worksheet = workbook.addWorksheet("Sheet1") |
| 10 | + |
| 11 | + worksheet.getCell("A1").value = "Hello" |
| 12 | + worksheet.getCell("B1").value = "World" |
| 13 | + worksheet.getCell("A2").value = "Test" |
| 14 | + worksheet.getCell("B2").value = 123 |
| 15 | + |
| 16 | + const result = await extractTextFromXLSX(workbook) |
| 17 | + |
| 18 | + expect(result).toContain("--- Sheet: Sheet1 ---") |
| 19 | + expect(result).toContain("Hello\tWorld") |
| 20 | + expect(result).toContain("Test\t123") |
| 21 | + }) |
| 22 | + |
| 23 | + it("should skip rows with no content", async () => { |
| 24 | + const workbook = new ExcelJS.Workbook() |
| 25 | + const worksheet = workbook.addWorksheet("Sheet1") |
| 26 | + |
| 27 | + worksheet.getCell("A1").value = "Row 1" |
| 28 | + // Row 2 is completely empty |
| 29 | + worksheet.getCell("A3").value = "Row 3" |
| 30 | + |
| 31 | + const result = await extractTextFromXLSX(workbook) |
| 32 | + |
| 33 | + expect(result).toContain("Row 1") |
| 34 | + expect(result).toContain("Row 3") |
| 35 | + // Should not contain empty rows |
| 36 | + expect(result).not.toMatch(/\n\t*\n/) |
| 37 | + }) |
| 38 | + }) |
| 39 | + |
| 40 | + describe("sheet handling", () => { |
| 41 | + it("should process multiple sheets", async () => { |
| 42 | + const workbook = new ExcelJS.Workbook() |
| 43 | + |
| 44 | + const sheet1 = workbook.addWorksheet("First Sheet") |
| 45 | + sheet1.getCell("A1").value = "Sheet 1 Data" |
| 46 | + |
| 47 | + const sheet2 = workbook.addWorksheet("Second Sheet") |
| 48 | + sheet2.getCell("A1").value = "Sheet 2 Data" |
| 49 | + |
| 50 | + const result = await extractTextFromXLSX(workbook) |
| 51 | + |
| 52 | + expect(result).toContain("--- Sheet: First Sheet ---") |
| 53 | + expect(result).toContain("Sheet 1 Data") |
| 54 | + expect(result).toContain("--- Sheet: Second Sheet ---") |
| 55 | + expect(result).toContain("Sheet 2 Data") |
| 56 | + }) |
| 57 | + |
| 58 | + it("should skip hidden sheets", async () => { |
| 59 | + const workbook = new ExcelJS.Workbook() |
| 60 | + |
| 61 | + const visibleSheet = workbook.addWorksheet("Visible Sheet") |
| 62 | + visibleSheet.getCell("A1").value = "Visible Data" |
| 63 | + |
| 64 | + const hiddenSheet = workbook.addWorksheet("Hidden Sheet") |
| 65 | + hiddenSheet.getCell("A1").value = "Hidden Data" |
| 66 | + hiddenSheet.state = "hidden" |
| 67 | + |
| 68 | + const result = await extractTextFromXLSX(workbook) |
| 69 | + |
| 70 | + expect(result).toContain("--- Sheet: Visible Sheet ---") |
| 71 | + expect(result).toContain("Visible Data") |
| 72 | + expect(result).not.toContain("--- Sheet: Hidden Sheet ---") |
| 73 | + expect(result).not.toContain("Hidden Data") |
| 74 | + }) |
| 75 | + |
| 76 | + it("should skip very hidden sheets", async () => { |
| 77 | + const workbook = new ExcelJS.Workbook() |
| 78 | + |
| 79 | + const visibleSheet = workbook.addWorksheet("Visible Sheet") |
| 80 | + visibleSheet.getCell("A1").value = "Visible Data" |
| 81 | + |
| 82 | + const veryHiddenSheet = workbook.addWorksheet("Very Hidden Sheet") |
| 83 | + veryHiddenSheet.getCell("A1").value = "Very Hidden Data" |
| 84 | + veryHiddenSheet.state = "veryHidden" |
| 85 | + |
| 86 | + const result = await extractTextFromXLSX(workbook) |
| 87 | + |
| 88 | + expect(result).toContain("--- Sheet: Visible Sheet ---") |
| 89 | + expect(result).toContain("Visible Data") |
| 90 | + expect(result).not.toContain("--- Sheet: Very Hidden Sheet ---") |
| 91 | + expect(result).not.toContain("Very Hidden Data") |
| 92 | + }) |
| 93 | + }) |
| 94 | + |
| 95 | + describe("formatCellValue logic", () => { |
| 96 | + it("should handle null and undefined values", async () => { |
| 97 | + const workbook = new ExcelJS.Workbook() |
| 98 | + const worksheet = workbook.addWorksheet("Sheet1") |
| 99 | + |
| 100 | + worksheet.getCell("A1").value = "Before" |
| 101 | + worksheet.getCell("A2").value = null |
| 102 | + worksheet.getCell("A3").value = undefined |
| 103 | + worksheet.getCell("A4").value = "After" |
| 104 | + |
| 105 | + const result = await extractTextFromXLSX(workbook) |
| 106 | + |
| 107 | + expect(result).toContain("Before") |
| 108 | + expect(result).toContain("After") |
| 109 | + // Should handle null/undefined as empty strings |
| 110 | + const lines = result.split("\n") |
| 111 | + const dataLines = lines.filter((line) => !line.startsWith("---") && line.trim()) |
| 112 | + expect(dataLines).toHaveLength(2) // Only 'Before' and 'After' should create content |
| 113 | + }) |
| 114 | + |
| 115 | + it("should format dates correctly", async () => { |
| 116 | + const workbook = new ExcelJS.Workbook() |
| 117 | + const worksheet = workbook.addWorksheet("Sheet1") |
| 118 | + |
| 119 | + const testDate = new Date("2023-12-25") |
| 120 | + worksheet.getCell("A1").value = testDate |
| 121 | + |
| 122 | + const result = await extractTextFromXLSX(workbook) |
| 123 | + |
| 124 | + expect(result).toContain("2023-12-25") |
| 125 | + }) |
| 126 | + |
| 127 | + it("should handle error values", async () => { |
| 128 | + const workbook = new ExcelJS.Workbook() |
| 129 | + const worksheet = workbook.addWorksheet("Sheet1") |
| 130 | + |
| 131 | + worksheet.getCell("A1").value = { error: "#DIV/0!" } |
| 132 | + |
| 133 | + const result = await extractTextFromXLSX(workbook) |
| 134 | + |
| 135 | + expect(result).toContain("[Error: #DIV/0!]") |
| 136 | + }) |
| 137 | + |
| 138 | + it("should handle rich text", async () => { |
| 139 | + const workbook = new ExcelJS.Workbook() |
| 140 | + const worksheet = workbook.addWorksheet("Sheet1") |
| 141 | + |
| 142 | + worksheet.getCell("A1").value = { |
| 143 | + richText: [{ text: "Hello " }, { text: "World", font: { bold: true } }], |
| 144 | + } |
| 145 | + |
| 146 | + const result = await extractTextFromXLSX(workbook) |
| 147 | + |
| 148 | + expect(result).toContain("Hello World") |
| 149 | + }) |
| 150 | + |
| 151 | + it("should handle hyperlinks", async () => { |
| 152 | + const workbook = new ExcelJS.Workbook() |
| 153 | + const worksheet = workbook.addWorksheet("Sheet1") |
| 154 | + |
| 155 | + worksheet.getCell("A1").value = { |
| 156 | + text: "Google", |
| 157 | + hyperlink: "https://www.google.com", |
| 158 | + } |
| 159 | + |
| 160 | + const result = await extractTextFromXLSX(workbook) |
| 161 | + |
| 162 | + expect(result).toContain("Google (https://www.google.com)") |
| 163 | + }) |
| 164 | + |
| 165 | + it("should handle formulas with and without results", async () => { |
| 166 | + const workbook = new ExcelJS.Workbook() |
| 167 | + const worksheet = workbook.addWorksheet("Sheet1") |
| 168 | + |
| 169 | + worksheet.getCell("A1").value = { formula: "A2+A3", result: 30 } |
| 170 | + worksheet.getCell("A2").value = { formula: "SUM(B1:B10)" } |
| 171 | + |
| 172 | + const result = await extractTextFromXLSX(workbook) |
| 173 | + |
| 174 | + expect(result).toContain("30") // Formula with result |
| 175 | + expect(result).toContain("[Formula: SUM(B1:B10)]") // Formula without result |
| 176 | + }) |
| 177 | + }) |
| 178 | + |
| 179 | + describe("row limit handling", () => { |
| 180 | + it("should respect the ROW_LIMIT constant", async () => { |
| 181 | + const workbook = new ExcelJS.Workbook() |
| 182 | + const worksheet = workbook.addWorksheet("Sheet1") |
| 183 | + |
| 184 | + // Add a reasonable number of rows for testing |
| 185 | + for (let i = 1; i <= 100; i++) { |
| 186 | + worksheet.getCell(`A${i}`).value = `Row ${i}` |
| 187 | + } |
| 188 | + |
| 189 | + const result = await extractTextFromXLSX(workbook) |
| 190 | + |
| 191 | + expect(result).toContain("Row 1") |
| 192 | + expect(result).toContain("Row 100") |
| 193 | + // Should not contain truncation message for 100 rows (under limit) |
| 194 | + expect(result).not.toContain("[... truncated at row") |
| 195 | + }) |
| 196 | + }) |
| 197 | + |
| 198 | + describe("edge cases", () => { |
| 199 | + it("should handle empty workbook", async () => { |
| 200 | + const workbook = new ExcelJS.Workbook() |
| 201 | + workbook.addWorksheet("Empty Sheet") |
| 202 | + |
| 203 | + const result = await extractTextFromXLSX(workbook) |
| 204 | + |
| 205 | + expect(result).toContain("--- Sheet: Empty Sheet ---") |
| 206 | + expect(result.trim()).toBe("--- Sheet: Empty Sheet ---") |
| 207 | + }) |
| 208 | + |
| 209 | + it("should handle workbook with only empty cells", async () => { |
| 210 | + const workbook = new ExcelJS.Workbook() |
| 211 | + const worksheet = workbook.addWorksheet("Sheet1") |
| 212 | + |
| 213 | + // Set cells but leave them empty |
| 214 | + worksheet.getCell("A1").value = "" |
| 215 | + worksheet.getCell("B1").value = "" |
| 216 | + |
| 217 | + const result = await extractTextFromXLSX(workbook) |
| 218 | + |
| 219 | + expect(result).toContain("--- Sheet: Sheet1 ---") |
| 220 | + // Should not contain any data rows since empty strings don't count as content |
| 221 | + const lines = result.split("\n").filter((line) => line.trim() && !line.startsWith("---")) |
| 222 | + expect(lines).toHaveLength(0) |
| 223 | + }) |
| 224 | + }) |
| 225 | + |
| 226 | + describe("function overloads", () => { |
| 227 | + it("should work with workbook objects", async () => { |
| 228 | + const workbook = new ExcelJS.Workbook() |
| 229 | + const worksheet = workbook.addWorksheet("Test") |
| 230 | + worksheet.getCell("A1").value = "Test Data" |
| 231 | + |
| 232 | + const result = await extractTextFromXLSX(workbook) |
| 233 | + |
| 234 | + expect(result).toContain("Test Data") |
| 235 | + }) |
| 236 | + |
| 237 | + it("should reject invalid file paths", async () => { |
| 238 | + await expect(extractTextFromXLSX("/non/existent/file.xlsx")).rejects.toThrow() |
| 239 | + }) |
| 240 | + }) |
| 241 | +}) |
0 commit comments