Skip to content

Commit cadda72

Browse files
authored
feat: add fuzz testing for Excel reading and improve error handling in XlsxSaxAnalyser and CSvReadExecutor (#555)
* feat: add fuzz testing for Excel reading and improve error handling in XlsxSaxAnalyser * feat: add fuzz testing for Excel reading and improve error handling in XlsxSaxAnalyser * feat: add ignore setting to avoid additional input files * feat: add tests for benign error tolerance in CSV reading
1 parent 5afa7e2 commit cadda72

File tree

8 files changed

+338
-29
lines changed

8 files changed

+338
-29
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,6 @@ website/versioned_sidebars
4040
website/versions.json
4141
website/pnpm-lock.yaml
4242
website/yarn.lock
43+
**/.cifuzz-corpus/
44+
**/fuzz_corpus/
45+
**/resources/**/fuzz/*Inputs/

fastexcel/pom.xml

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
<?xml version="1.0" encoding="UTF-8"?>
22
<project xmlns="http://maven.apache.org/POM/4.0.0"
3-
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4-
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
3+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
55
<modelVersion>4.0.0</modelVersion>
66

77
<parent>
@@ -132,6 +132,11 @@
132132
</exclusion>
133133
</exclusions>
134134
</dependency>
135+
<dependency>
136+
<groupId>com.code-intelligence</groupId>
137+
<artifactId>jazzer-junit</artifactId>
138+
<scope>test</scope>
139+
</dependency>
135140
</dependencies>
136141

137142
<build>

fastexcel/src/main/java/cn/idev/excel/analysis/csv/CsvExcelReadExecutor.java

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import java.io.IOException;
1818
import java.io.InputStream;
1919
import java.io.InputStreamReader;
20+
import java.io.UncheckedIOException;
2021
import java.nio.file.Files;
2122
import java.util.ArrayList;
2223
import java.util.Iterator;
@@ -93,6 +94,19 @@ public void execute() {
9394
if (log.isDebugEnabled()) {
9495
log.debug("Custom stop!", e);
9596
}
97+
} catch (UncheckedIOException e) {
98+
// Apache Commons CSV may throw UncheckedIOException wrapping an IOException when the input
99+
// contains truncated quoted fields or reaches EOF unexpectedly. Treat such cases as benign
100+
// and end the current sheet gracefully; otherwise, rethrow as analysis exception.
101+
if (isBenignCsvParseException(e)) {
102+
if (log.isDebugEnabled()) {
103+
log.debug("CSV parse finished early due to benign parse error: {}", e.getMessage());
104+
} else if (log.isWarnEnabled()) {
105+
log.warn("CSV parse finished early due to benign parse error.");
106+
}
107+
} else {
108+
throw new ExcelAnalysisException(e);
109+
}
96110
}
97111

98112
// The last sheet is read
@@ -204,4 +218,29 @@ private void dealRecord(CSVRecord record, int rowIndex) {
204218
csvReadContext.csvReadSheetHolder().setRowIndex(rowIndex);
205219
csvReadContext.analysisEventProcessor().endRow(csvReadContext);
206220
}
221+
222+
/**
223+
* Determine whether an UncheckedIOException from Commons CSV is benign, i.e., caused by
224+
* truncated quoted fields or early EOF while parsing an encapsulated token. In such cases
225+
* we should stop reading the current sheet gracefully rather than failing the whole read.
226+
*/
227+
private static boolean isBenignCsvParseException(Throwable t) {
228+
Throwable cur = t;
229+
while (cur != null) {
230+
if (cur instanceof IOException) {
231+
String msg = cur.getMessage();
232+
if (msg != null) {
233+
// Messages observed from Apache Commons CSV
234+
if (msg.contains("EOF reached before encapsulated token finished")
235+
|| msg.contains("encapsulated token finished")
236+
|| msg.contains("Unexpected EOF in quoted field")
237+
|| msg.contains("Unclosed quoted field")) {
238+
return true;
239+
}
240+
}
241+
}
242+
cur = cur.getCause();
243+
}
244+
return false;
245+
}
207246
}

fastexcel/src/main/java/cn/idev/excel/analysis/v07/XlsxSaxAnalyser.java

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import cn.idev.excel.enums.CellExtraTypeEnum;
99
import cn.idev.excel.exception.ExcelAnalysisException;
1010
import cn.idev.excel.exception.ExcelAnalysisStopSheetException;
11+
import cn.idev.excel.exception.ExcelCommonException;
1112
import cn.idev.excel.metadata.CellExtra;
1213
import cn.idev.excel.read.metadata.ReadSheet;
1314
import cn.idev.excel.read.metadata.holder.xlsx.XlsxReadWorkbookHolder;
@@ -30,6 +31,7 @@
3031
import javax.xml.parsers.SAXParserFactory;
3132
import lombok.extern.slf4j.Slf4j;
3233
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
34+
import org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException;
3335
import org.apache.poi.openxml4j.opc.OPCPackage;
3436
import org.apache.poi.openxml4j.opc.PackageAccess;
3537
import org.apache.poi.openxml4j.opc.PackagePart;
@@ -210,26 +212,31 @@ private void analysisCtSheetMap(XSSFReader xssfReader, XlsxReadWorkbookHolder xl
210212

211213
private OPCPackage readOpcPackage(XlsxReadWorkbookHolder xlsxReadWorkbookHolder, InputStream decryptedStream)
212214
throws Exception {
213-
if (decryptedStream == null && xlsxReadWorkbookHolder.getFile() != null) {
214-
return OPCPackage.open(xlsxReadWorkbookHolder.getFile());
215-
}
216-
if (xlsxReadWorkbookHolder.getMandatoryUseInputStream()) {
215+
try {
216+
if (decryptedStream == null && xlsxReadWorkbookHolder.getFile() != null) {
217+
return OPCPackage.open(xlsxReadWorkbookHolder.getFile());
218+
}
219+
if (xlsxReadWorkbookHolder.getMandatoryUseInputStream()) {
220+
if (decryptedStream != null) {
221+
return OPCPackage.open(decryptedStream);
222+
} else {
223+
return OPCPackage.open(xlsxReadWorkbookHolder.getInputStream());
224+
}
225+
}
226+
File readTempFile = FileUtils.createCacheTmpFile();
227+
xlsxReadWorkbookHolder.setTempFile(readTempFile);
228+
File tempFile = new File(readTempFile.getPath(), UUID.randomUUID() + ".xlsx");
217229
if (decryptedStream != null) {
218-
return OPCPackage.open(decryptedStream);
230+
FileUtils.writeToFile(tempFile, decryptedStream, false);
219231
} else {
220-
return OPCPackage.open(xlsxReadWorkbookHolder.getInputStream());
232+
FileUtils.writeToFile(
233+
tempFile, xlsxReadWorkbookHolder.getInputStream(), xlsxReadWorkbookHolder.getAutoCloseStream());
221234
}
235+
return OPCPackage.open(tempFile, PackageAccess.READ);
236+
} catch (NotOfficeXmlFileException | InvalidFormatException e) {
237+
// Wrap as a common, expected format error for callers/tests to handle gracefully
238+
throw new ExcelCommonException("Invalid OOXML/zip format: " + e.getMessage(), e);
222239
}
223-
File readTempFile = FileUtils.createCacheTmpFile();
224-
xlsxReadWorkbookHolder.setTempFile(readTempFile);
225-
File tempFile = new File(readTempFile.getPath(), UUID.randomUUID() + ".xlsx");
226-
if (decryptedStream != null) {
227-
FileUtils.writeToFile(tempFile, decryptedStream, false);
228-
} else {
229-
FileUtils.writeToFile(
230-
tempFile, xlsxReadWorkbookHolder.getInputStream(), xlsxReadWorkbookHolder.getAutoCloseStream());
231-
}
232-
return OPCPackage.open(tempFile, PackageAccess.READ);
233240
}
234241

235242
@Override
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
package cn.idev.excel.csv;
2+
3+
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
4+
import static org.junit.jupiter.api.Assertions.assertThrows;
5+
import cn.idev.excel.FastExcelFactory;
6+
import cn.idev.excel.exception.ExcelCommonException;
7+
import cn.idev.excel.read.builder.CsvReaderBuilder;
8+
import java.io.ByteArrayInputStream;
9+
import java.io.IOException;
10+
import java.io.InputStream;
11+
import java.nio.charset.StandardCharsets;
12+
import java.util.List;
13+
import org.junit.jupiter.api.Test;
14+
15+
/**
16+
* Tests that truncated/unfinished quoted fields in CSV are treated as benign and
17+
* end the current sheet gracefully without throwing to the caller.
18+
*/
19+
class CsvBenignErrorToleranceTest {
20+
21+
@Test
22+
void shouldNotThrowOnUnclosedQuotedField_EOFBenign() {
23+
// Given: a CSV with an unclosed quoted field that triggers Commons CSV EOF inside quotes
24+
String csv = "col1,col2\n\"unfinished,2"; // second line has an unclosed quoted field
25+
ByteArrayInputStream in = new ByteArrayInputStream(csv.getBytes(StandardCharsets.UTF_8));
26+
27+
// When / Then: reading should complete without throwing any exception
28+
assertDoesNotThrow(() -> {
29+
CsvReaderBuilder builder = FastExcelFactory.read(in).csv();
30+
// Use sync read to drive the pipeline end-to-end
31+
List<Object> rows = builder.doReadSync();
32+
// No strict assertion on content; important is no exception is thrown
33+
// and the reader finishes gracefully.
34+
});
35+
}
36+
37+
@Test
38+
void shouldRethrowOnNonBenignUncheckedIOException() {
39+
// Given: a CSV stream that throws a non-benign IOException during read
40+
String csv = "a,b\n1,2\n3,4\n";
41+
byte[] data = csv.getBytes(StandardCharsets.UTF_8);
42+
InputStream throwing = new InputStream() {
43+
int idx = 0;
44+
45+
@Override
46+
public int read() throws IOException {
47+
if (idx >= data.length) {
48+
return -1;
49+
}
50+
// After consuming some bytes, simulate an IO failure with a non-benign message
51+
if (idx > data.length / 2) {
52+
throw new IOException("Simulated IO failure");
53+
}
54+
return data[idx++];
55+
}
56+
57+
@Override
58+
public int read(byte[] b, int off, int len) throws IOException {
59+
// Fallback to single-byte reads to trigger our error logic reliably
60+
int i = read();
61+
if (i == -1) {
62+
return -1;
63+
}
64+
b[off] = (byte) i;
65+
return 1;
66+
}
67+
};
68+
69+
// When / Then: the pipeline should convert UncheckedIOException into ExcelAnalysisException
70+
assertThrows(ExcelCommonException.class, () -> {
71+
FastExcelFactory.read(throwing).csv().doReadSync();
72+
});
73+
}
74+
75+
@Test
76+
void shouldReadWellFormedCsvNormally() {
77+
String csv = "a,b\n1,2\n3,4\n";
78+
ByteArrayInputStream in = new ByteArrayInputStream(csv.getBytes(StandardCharsets.UTF_8));
79+
assertDoesNotThrow(() -> {
80+
CsvReaderBuilder builder = FastExcelFactory.read(in).csv();
81+
List<Object> rows = builder.doReadSync();
82+
});
83+
}
84+
}
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
package cn.idev.excel.exception;
2+
3+
import static org.junit.jupiter.api.Assertions.assertThrows;
4+
import static org.junit.jupiter.api.Assertions.assertTrue;
5+
import cn.idev.excel.analysis.v07.XlsxSaxAnalyser;
6+
import cn.idev.excel.context.xlsx.DefaultXlsxReadContext;
7+
import cn.idev.excel.context.xlsx.XlsxReadContext;
8+
import cn.idev.excel.read.metadata.ReadWorkbook;
9+
import cn.idev.excel.support.ExcelTypeEnum;
10+
import java.io.ByteArrayInputStream;
11+
import java.io.File;
12+
import java.io.FileOutputStream;
13+
import java.nio.charset.StandardCharsets;
14+
import java.nio.file.Files;
15+
import java.util.zip.ZipOutputStream;
16+
import org.junit.jupiter.api.Test;
17+
18+
/**
19+
* Tests for XlsxSaxAnalyser.readOpcPackage error handling: it should wrap
20+
* POI's NotOfficeXmlFileException/InvalidFormatException into ExcelCommonException
21+
* with a message containing "Invalid OOXML/zip format".
22+
*/
23+
public class XlsxSaxAnalyserReadOpcPackageTest {
24+
25+
@Test
26+
void invalidInputStream_mandatoryUseInputStream_throwsExcelCommonException() {
27+
ReadWorkbook rw = new ReadWorkbook();
28+
rw.setInputStream(new ByteArrayInputStream("not-xlsx".getBytes(StandardCharsets.UTF_8)));
29+
rw.setMandatoryUseInputStream(true);
30+
XlsxReadContext ctx = new DefaultXlsxReadContext(rw, ExcelTypeEnum.XLSX);
31+
32+
ExcelCommonException ex = assertThrows(ExcelCommonException.class, () -> new XlsxSaxAnalyser(ctx, null));
33+
assertTrue(ex.getMessage() != null && ex.getMessage().toLowerCase().contains("invalid ooxml/zip format"));
34+
}
35+
36+
@Test
37+
void invalidFile_throwsExcelCommonException() throws Exception {
38+
File tmp = File.createTempFile("invalid-ooxml", ".xlsx");
39+
try {
40+
Files.write(tmp.toPath(), new byte[] {1, 2, 3, 4});
41+
ReadWorkbook rw = new ReadWorkbook();
42+
rw.setFile(tmp);
43+
XlsxReadContext ctx = new DefaultXlsxReadContext(rw, ExcelTypeEnum.XLSX);
44+
45+
ExcelCommonException ex = assertThrows(ExcelCommonException.class, () -> new XlsxSaxAnalyser(ctx, null));
46+
assertTrue(ex.getMessage() != null && ex.getMessage().toLowerCase().contains("invalid ooxml/zip format"));
47+
} finally {
48+
try {
49+
Files.deleteIfExists(tmp.toPath());
50+
} catch (Exception ignore) {
51+
}
52+
}
53+
}
54+
55+
@Test
56+
void decryptedStreamProvided_throwsExcelCommonException() {
57+
ReadWorkbook rw = new ReadWorkbook();
58+
// do not set file/inputStream; pass decryptedStream directly
59+
XlsxReadContext ctx = new DefaultXlsxReadContext(rw, ExcelTypeEnum.XLSX);
60+
ByteArrayInputStream decrypted = new ByteArrayInputStream("still-not-xlsx".getBytes(StandardCharsets.UTF_8));
61+
62+
ExcelCommonException ex = assertThrows(ExcelCommonException.class, () -> new XlsxSaxAnalyser(ctx, decrypted));
63+
assertTrue(ex.getMessage() != null && ex.getMessage().toLowerCase().contains("invalid ooxml/zip format"));
64+
}
65+
66+
@Test
67+
void emptyZipFile_throwsExcelCommonException() throws Exception {
68+
File tmp = File.createTempFile("empty-zip", ".xlsx");
69+
try (FileOutputStream fos = new FileOutputStream(tmp);
70+
ZipOutputStream zos = new ZipOutputStream(fos)) {
71+
// write an empty zip with no entries
72+
}
73+
try {
74+
ReadWorkbook rw = new ReadWorkbook();
75+
rw.setFile(tmp);
76+
XlsxReadContext ctx = new DefaultXlsxReadContext(rw, ExcelTypeEnum.XLSX);
77+
ExcelCommonException ex = assertThrows(ExcelCommonException.class, () -> new XlsxSaxAnalyser(ctx, null));
78+
assertTrue(ex.getMessage() != null && ex.getMessage().toLowerCase().contains("invalid ooxml/zip format"));
79+
} finally {
80+
try {
81+
Files.deleteIfExists(tmp.toPath());
82+
} catch (Exception ignore) {
83+
}
84+
}
85+
}
86+
87+
@Test
88+
void inputStream_nonMandatoryUseTempFileBranch_throwsExcelCommonException() {
89+
ReadWorkbook rw = new ReadWorkbook();
90+
rw.setInputStream(new ByteArrayInputStream("not-xlsx".getBytes(StandardCharsets.UTF_8)));
91+
// mandatoryUseInputStream unset or false -> will write to temp file and then open
92+
rw.setMandatoryUseInputStream(false);
93+
XlsxReadContext ctx = new DefaultXlsxReadContext(rw, ExcelTypeEnum.XLSX);
94+
95+
ExcelCommonException ex = assertThrows(ExcelCommonException.class, () -> new XlsxSaxAnalyser(ctx, null));
96+
assertTrue(ex.getMessage() != null && ex.getMessage().toLowerCase().contains("invalid ooxml/zip format"));
97+
}
98+
}
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
package cn.idev.excel.fuzz;
2+
3+
import cn.idev.excel.FastExcelFactory;
4+
import cn.idev.excel.read.builder.ExcelReaderBuilder;
5+
import com.code_intelligence.jazzer.junit.FuzzTest;
6+
import java.io.ByteArrayInputStream;
7+
import java.io.InputStream;
8+
import lombok.SneakyThrows;
9+
10+
/**
11+
* Fuzzes the generic read path with arbitrary bytes to discover parsing issues.
12+
*/
13+
public class ExcelReadFuzzTest {
14+
15+
private static final int MAX_SIZE = 1_000_000; // 1MB guard to avoid OOM / long loops
16+
17+
@SneakyThrows
18+
@FuzzTest
19+
void fuzzRead(byte[] data) {
20+
if (data == null || data.length == 0 || data.length > MAX_SIZE) {
21+
return; // Ignore trivial or oversized inputs
22+
}
23+
try (InputStream in = new ByteArrayInputStream(data)) {
24+
ExcelReaderBuilder builder = FastExcelFactory.read(in);
25+
// Always attempt to read first sheet synchronously if possible
26+
builder.sheet().doReadSync();
27+
} catch (Throwable t) {
28+
// Jazzer treats uncaught exceptions as findings. We allow RuntimeExceptions that
29+
// indicate expected format errors, but still surface anything else.
30+
// Swallow common benign exceptions to reduce noise.
31+
String msg = t.getMessage();
32+
if (msg != null) {
33+
String lower = msg.toLowerCase();
34+
if (lower.contains("invalid")
35+
|| lower.contains("zip")
36+
|| lower.contains("format")
37+
|| lower.contains("end of central directory")) {
38+
return; // expected parse/format issues
39+
}
40+
}
41+
throw t;
42+
}
43+
}
44+
}

0 commit comments

Comments
 (0)